library(Biostrings)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.3.1
## Loading required package: XVector
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.3.1
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
library(ggplot2)
library(gggenes)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Biostrings':
##
## collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following object is masked from 'package:XVector':
##
## slice
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
Sequences downloaded from ncbi have long names that I reformat this way:
First use this :
sed -E -i '' 's/(^>[A-Z]*_[0-9][0-9]*)\.[0-9]_.*\[(.*)]/\1_\2/g' contig_*.aln
This will change the names such as :
>YP_009337856.1_RNA-dependent_RNA_polymerase_[Wenling_crustacean_virus_14]
into
>YP_009337856.1_Wenling_crustacean_virus_14
Then some sequences have different structure names. This second sed command will reformat them also :
sed -E -i '' 's/(^>[A-Z][A-Z]*[0-9][0-9]*)\.[0-9]_.*\[(.*)]/\1_\2/g' contig_*.aln
this will change ids such as >AWA82254.1_hypothetical_protein,partial[Mogami_virus] into >AWA82254_Mogami_virus
Finally, remove any special characters that are not allowed in sequence names in Seaview.
sed -i '' 's/(//g' contig*.aln
sed -i '' 's/)//g' contig*.aln
sed -i '' 's/,//g' contig*.aln
sed -i '' 's/;//g' contig*.aln
sed -i '' 's/://g' contig*.aln
Sequences homologous to ours were obtained by a blastp approach (mmseqs2), using a 10-10 evalue threshold. For some of our sequences this approach retrieved too many sequences for ML phylogenetic reconstruction. In that case, we first built a NJ phylogeny to select a subset of sequences used in the ML phylogenies.
import all WGA contigs and their gff
contigs_wga=readBStringSet("../sequences/wga_final_contigs_with_unassigned.fa")
head(contigs_wga)
## BStringSet object of length 6:
## width seq names
## [1] 1613 AGGAGAAGGAGAGACTAAAAAGC...TCGAAGCGTAAGGGAAAAGGAGC contig_10471
## [2] 1503 TCTGGGCTGGTCTTATGGGGGGA...TTCATGATGTCCGTCAATTCTGT contig_11634
## [3] 1484 TTTTTTTTTCAAGCAGAAGACGG...ATGGCCCCCCACCCCCAAACACA contig_11850
## [4] 1450 TTGTTGAAATTTATTAAATAAAT...GTTTATAAATAATTTTTTTTTGC contig_12283
## [5] 10490 TTATAAATTTTAAATGAAAAGAA...TTTTTGGGTCACAATGGTTTTTG contig_1269
## [6] 7285 TCATTTAAAAAATTTACAAATTT...GAAAAAAAATATTATAAAAAAAA contig_1350
gff_wga=read.table("../sequences/wga_final_contigs.gff")
# add unassigned contigs
gff_wga_unassigned0=read.table("../sequences/final_contigs_unassigned_wgta_prediction_option0.gff")
gff_wga_unassigned1=read.table("../sequences/final_contigs_unassigned_wgta_prediction_option1.gff")
# we will use the phase column to indicate which getorf option has been used : 1=>1; 2=>0
names(gff_wga)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
names(gff_wga_unassigned0)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
names(gff_wga_unassigned1)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
gff_wga$phase=1
gff_wga_unassigned1$phase=1
gff_wga_unassigned0$phase=2
gff_wga=rbind(gff_wga, gff_wga_unassigned0, gff_wga_unassigned1)
gff_wga$strand=as.factor(gff_wga$strand)
levels(gff_wga$strand)=c("FALSE", "TRUE")
gff_wga$phase=as.factor(gff_wga$phase)
head(gff_wga)
Add contig length
contig_length=width(contigs_wga)
names(contig_length)=names(contigs_wga)
gff_wga=merge(gff_wga, contig_length, by.x="seqid", by.y="row.names")
names(gff_wga)[10]="seq_length"
Add an orf id column
orf_names=paste(gff_wga$seqid, gff_wga$start, gff_wga$end, gff_wga$strand, sep="_")
orf_names=sub(pattern = "TRUE", "+", x = orf_names)
orf_names=sub(pattern = "FALSE", "-", x = orf_names)
gff_wga$orf_name=orf_names
import blastp results
wga_blast=read.table("../TABLES/wga_final_contigs_getorf.blastp.tab")
names=c("query_id", "subject_id", "identity", "alignment_length", "mismatches", "gap_opens", "qstart", "qend", "sstart", "send", "evalue", "bitscore")
names(wga_blast)=names
head(wga_blast)
import subject_id sequences (with informative names)
subject_id_seqs=readBStringSet("../sequences/wga_protein_homologs.fasta")
subject_id_seqs
## BStringSet object of length 1230:
## width seq names
## [1] 604 MNIVENSIFLSNLMKSANTFEL...SLRKCPICRSTIKGTVRTFLS NP_001156.1 bacul...
## [2] 618 MHKTASQRLFPGPSYQNIKSIM...SLRKCPICRGIIKGTVRTFLS NP_001157.1 bacul...
## [3] 148 MKALIVLGLVLLSVTVQGKVFE...VAWRNRCQNRDVRQYVQGCGV NP_000230.1 lysoz...
## [4] 437 MTTSTLQKAIDLVTKATEEDKA...TVNADDLLKVKKFSEDFGQES NP_037377.1 vacuo...
## [5] 618 MTDRGTNNDDWYIVDEAECRDD...ESDGKPQQPLRLATRAASNSI NP_040898.1 hypot...
## ... ... ...
## [1226] 140 MKAFFALVLLAIAASAMAGRTL...WSAWAVWHYCSGWLPSIDECF XP_016022646.2 ly...
## [1227] 140 MKAFIVLVALACAAPAFARTMD...WSAWSTWHYCSGWLPSIDDCF XP_039149016.1 ly...
## [1228] 140 MKAFIVLVALACAAPAFARTMD...WSAWSTWHYCSGWLPSIDDCF XP_016029777.2 ly...
## [1229] 1033 MFPPRLLRIAFVICLLIVLLSP...ENVCDWPENVEGCHTPTEAPA XP_039149439.1 pr...
## [1230] 159 MKAWGTVVVTLATLMVVTVDAK...DLSEWLKGCDMHVKIDPKIHP NP_001381227.1 sp...
short_names=unlist(lapply(strsplit(names(subject_id_seqs), " "), FUN=function(x){return(x[1])}))
annotation=unlist(lapply(strsplit(names(subject_id_seqs), " "), FUN=function(x){
res=paste0(x[-1],collapse = " ")
return(res)}))
df=data.frame(short_names, annotation)
Add this information to blast output
wga_blast2=merge(wga_blast, df, by.x="subject_id", by.y="short_names", all.x=TRUE, all.y=FALSE)
Combine this to the gff
# select the smallest evalue for each query (group)
wga_blast2_besthit = wga_blast2 %>% group_by(query_id) %>% arrange(evalue) %>% dplyr::slice(1)
#wga_blast2_besthit=arrange(wga_blast2, evalue)
gff_wga2=merge(gff_wga, wga_blast2_besthit, by.x="orf_name", by.y="query_id", all.x=TRUE, all.y=FALSE)
write to disk:
write.table(gff_wga2, file = "../figures/orf_predictions/gff_wga2.txt", row.names = FALSE, col.names = TRUE, quote=FALSE, sep="\t")
head(gff_wga2)
wga_taxo_info=read.table("../TABLES/wga_protein_homologs.ids_taxid.txt", sep=";", h=TRUE)
dim(wga_taxo_info)
## [1] 1230 28
head(wga_taxo_info)
Define family colours
library(RColorBrewer)
wga_taxo_info$family_colour=as.factor(wga_taxo_info$family)
levels(wga_taxo_info$family_colour)=c( brewer.pal(12, name = "Set3"), brewer.pal(12, name = "Paired"))
library(ggplot2)
library(ape)
##
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
##
## where
## The following object is masked from 'package:Biostrings':
##
## complement
library(ggtree)
## ggtree v3.8.0 For help: https://yulab-smu.top/treedata-book/
##
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
##
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
##
## G Yu. Data Integration, Manipulation and Visualization of Phylogenetic
## Trees (1st ed.). Chapman and Hall/CRC. 2022. ISBN: 9781032233574
##
## Guangchuang Yu. Data Integration, Manipulation and Visualization of
## Phylogenetic Trees (1st edition). Chapman and Hall/CRC. 2022,
## doi:10.1201/9781003279242
##
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
##
## rotate
## The following object is masked from 'package:Biostrings':
##
## collapse
## The following object is masked from 'package:IRanges':
##
## collapse
## The following object is masked from 'package:S4Vectors':
##
## expand
library(dplyr)
plot_phylogeny=function(file="../phylogenies/contig_2320_1853_2266_+_with_homologs-PhyML_tree3", taxo_info=wga_taxo_info){
#file="../phylogenies/contig_All_reoviruses_non_redundant-PhyML_tree"
# replace brackets
command1=paste0("/opt/homebrew/bin/gsed -i 's/\\[/-/g' ", as.character(file))
command2=paste0("/opt/homebrew/bin/gsed -i 's/\\]//g' ", as.character(file))
system(command1)
system(command2)
tree=read.tree(file, comment.char = "")
# remove .[number of version] in accession numbers to match the accession numbers in aln files.
taxo_info$target = gsub(pattern = "\\.[0-9]", replacement = "", x = taxo_info$target)
# add metadata :
prot_ids=unlist(lapply(strsplit(tree$tip.label,"_[a-zA-Z]"), FUN=function(x){return(paste0(x[1]))}))
# remove .[number of version] in target numbers to match the target numbers in aln files.
prot_ids = gsub(pattern = "\\.[0-9]", replacement = "", x = prot_ids)
d=data.frame(tree$tip.label, prot_ids)
dd=merge(d, taxo_info, by.x="prot_ids", by.y="target", all.x=TRUE, all.y=FALSE)
dd[grep(pattern = "contig", x = dd$prot_ids), "family" ]="This paper"
dd$family[is.na(dd$family)]="Unassigned" # replace NA by unassigned
dd=dd[, c(2, 1, 4:dim(dd)[2])] # IMPORTANT : reorder columns : The only requirement of the input data is that its first column should be matched with the node/tip labels of the tree.
# https://bioconductor.riken.jp/packages/3.4/bioc/vignettes/ggtree/inst/doc/treeAnnotation.html
# colour code
ddd=read.table("../TABLES/colour_code_virus_families.txt", header=TRUE, sep="\t", comment.char = "")
ddd=as_tibble(ddd)
names(ddd)=c("family", "family_colour")
ddd=ddd[order(ddd$family),]
# subset to colour levels present in the phylogeny
ddd=ddd[ddd$family %in% unique(dd$family),]
p = ggtree(tree)
p <- p %<+% dd +
theme_tree() +
geom_text2(aes(label=label, subset = as.numeric(label) > 0.7), size=2) + # plot only aLRT >0.70
geom_treescale(x=0, y=-2, linesize= 0.25,fontsize=5) + # adds the scale
xlim(0, 7) + theme(#legend.position = "left",
legend.position = c("right"),
legend.key.size = unit(20, 'cm'), #change legend key size
legend.key.height = unit(1, 'cm'), #change legend key height
legend.key.width = unit(1, 'cm'), #change legend key width
legend.title = element_blank(), #change legend title font size
legend.text = element_text(size=12), #change legend text font size
aspect.ratio = 1
)
p = p + geom_tiplab(aes(color = family), size=3) +
scale_color_manual(values=as.vector(ddd$family_colour))
p = p + guides(color = guide_legend(override.aes = list(size = 6))) # to modify legend symbol size
return(p)
}
Create a list to put tentative virus names, their corresponding contigs (either with or without hits). Contigs without hits co-occur with the other ones (dark matter…)
virus_list=list()
Function for plotting orf predictions
plot_orfs=function(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned, gff=gff_wga, name="name"){
tab=gff[which(gff$seqid %in% c(contig_set, contig_set_unassigned)),]
print(tab)
p=ggplot(tab, aes(xmin = start, xmax = end, y = seqid, forward = strand, label= annotation)) +
geom_gene_arrow(aes(lty=phase)) + facet_wrap(~ seqid, scales = "free_y", ncol = 1) +
theme_genes() + geom_segment(aes(y = seqid, yend = seqid, x=seq_length), xend = 100000, colour = "white", size = 2) + geom_segment(aes(y = seqid, yend = seqid, x=0), xend = -100000, colour = "white", size = 2) + xlab("") + ylab("")
ggsave(
filename = paste0(paste0("../figures/orf_predictions/", name,".pdf")),
plot = p, device = "pdf"
)
p
return(list(p, tab))
}
Define the corresponding contigs :
contig_set="contig_2320"
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Parvoviridae_Pachy'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "Parvoviridae_Pachy")
## orf_name seqid source type start end score strand
## 780 contig_2320_144_1853_+ contig_2320 getorf_JV gene 144 1853 . TRUE
## 781 contig_2320_1853_2266_+ contig_2320 getorf_JV gene 1853 2266 . TRUE
## 782 contig_2320_2273_2926_- contig_2320 getorf_JV gene 2273 2926 . FALSE
## 783 contig_2320_2607_3260_- contig_2320 getorf_JV gene 2607 3260 . FALSE
## 784 contig_2320_3303_4526_- contig_2320 getorf_JV gene 3303 4526 . FALSE
## 785 contig_2320_4530_4817_- contig_2320 getorf_JV gene 4530 4817 . FALSE
## 786 contig_2320_4874_5044_+ contig_2320 getorf_JV gene 4874 5044 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 780 1 1710 5045 YP_009552708.1 0.262 444
## 781 1 414 5045 YP_009256211.1 0.529 119
## 782 1 654 5045 <NA> NA NA
## 783 1 654 5045 <NA> NA NA
## 784 1 1224 5045 YP_009256212.1 0.313 201
## 785 1 288 5045 NP_051016.1 0.473 91
## 786 1 171 5045 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 780 262 0 120 563 87 442 1.890e-23 111
## 781 56 0 2 120 557 675 6.803e-38 142
## 782 NA NA NA NA NA NA NA NA
## 783 NA NA NA NA NA NA NA NA
## 784 135 0 211 407 185 385 4.945e-25 114
## 785 45 0 5 95 152 237 1.055e-13 70
## 786 NA NA NA NA NA NA NA NA
## annotation
## 780 putative NS1 [Bombus cryptarum densovirus]
## 781 putative nonstructural protein NS1 [Diaphorina citri densovirus]
## 782 <NA>
## 783 <NA>
## 784 putative structural protein [Diaphorina citri densovirus]
## 785 structural protein [Periplaneta fuliginosa densovirus]
## 786 <NA>
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 780 contig_2320_144_1853_+ contig_2320 getorf_JV gene 144 1853 . TRUE
## 781 contig_2320_1853_2266_+ contig_2320 getorf_JV gene 1853 2266 . TRUE
## 782 contig_2320_2273_2926_- contig_2320 getorf_JV gene 2273 2926 . FALSE
## 783 contig_2320_2607_3260_- contig_2320 getorf_JV gene 2607 3260 . FALSE
## 784 contig_2320_3303_4526_- contig_2320 getorf_JV gene 3303 4526 . FALSE
## 785 contig_2320_4530_4817_- contig_2320 getorf_JV gene 4530 4817 . FALSE
## 786 contig_2320_4874_5044_+ contig_2320 getorf_JV gene 4874 5044 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 780 1 1710 5045 YP_009552708.1 0.262 444
## 781 1 414 5045 YP_009256211.1 0.529 119
## 782 1 654 5045 <NA> NA NA
## 783 1 654 5045 <NA> NA NA
## 784 1 1224 5045 YP_009256212.1 0.313 201
## 785 1 288 5045 NP_051016.1 0.473 91
## 786 1 171 5045 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 780 262 0 120 563 87 442 1.890e-23 111
## 781 56 0 2 120 557 675 6.803e-38 142
## 782 NA NA NA NA NA NA NA NA
## 783 NA NA NA NA NA NA NA NA
## 784 135 0 211 407 185 385 4.945e-25 114
## 785 45 0 5 95 152 237 1.055e-13 70
## 786 NA NA NA NA NA NA NA NA
## annotation
## 780 putative NS1 [Bombus cryptarum densovirus]
## 781 putative nonstructural protein NS1 [Diaphorina citri densovirus]
## 782 <NA>
## 783 <NA>
## 784 putative structural protein [Diaphorina citri densovirus]
## 785 structural protein [Periplaneta fuliginosa densovirus]
## 786 <NA>
The third and fourth ORF do overlap a lot. They were kept by our orf predictor script because they have exactly the same length (674nt). None of them have homologs in db.
tab=res[[2]]
tab$annotation2=c("NS prot", "NS prot", "", "" , "structural prot", "struct prot", "")
p=ggplot(tab, aes(xmin = start, xmax = end, y = seqid, forward = strand)) +
geom_gene_arrow(aes(lty=phase)) + facet_wrap(~ seqid, scales = "free_y", ncol = 1) +
theme_genes() + geom_segment(aes(y = seqid, yend = seqid, x=seq_length), xend = 100000, colour = "white", size = 2) + geom_segment(aes(y = seqid, yend = seqid, x=0), xend = -100000, colour = "white", size = 2) +
xlab("")+ylab("")
ggsave(
filename = paste0("../figures/orf_predictions/densovirus.pdf"),
plot = p
)
## Saving 7 x 5 in image
p
We found the typical inverted terminal repeat (ITR) at the 3’end (but not at the 5’ end).
Score = 198 bits (107), Expect = 2e-51
Identities = 148/165 (90%), Gaps = 14/165 (8%)
Strand=Plus/Minus
Query 4882 TCGAGTGAAACTGACGAACCTCAAAGCCCCTCCTCGATGTATACCCCCAACACACAAAAC 4941
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Sbjct 5039 TCGAGTGAAACTGACGAACCTCAAAGCCCCTCCTCGATGTATACCCCCAACACACAAAAC 4980
Query 4942 CATGGCCTATATAATCATGACAAAGTC--GA-T-TAT-GGCC--GGTTTTGTGTGTTGGG 4994
| |||| |||||| ||| ||| || | ||| |||| ||||||||||||||||
Sbjct 4979 C--GGCC---ATAATC--GACTTTGTCATGATTATATAGGCCATGGTTTTGTGTGTTGGG 4927
Query 4995 GGTATACATCGAGGAGGGGCTTTGAGGTTCGTCAGTTTCACTCGA 5039
|||||||||||||||||||||||||||||||||||||||||||||
Sbjct 4926 GGTATACATCGAGGAGGGGCTTTGAGGTTCGTCAGTTTCACTCGA 4882
We built a phylogenetic tree based on NS1 protein (ORF 1853-2266).
p = plot_phylogeny("../phylogenies/contig_2320_1853_2266_+_with_homologs-PhyML_tree", taxo_info = wga_taxo_info)
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
ref : see Nigg & Falk 2020 JGV
12 segments are found, 2 of which have no homologs but share the same distribution. Note that 2 additional ones have similar distribution but were discarded because they have low coding density ( “contig_21183”, “contig_20579”).
Define the corresponding contigs :
contig_set=c("contig_2799", "contig_14992", "contig_2780", "contig_2857", "contig_22871", "contig_2659", "contig_8503", "contig_15585")
contig_set_unassigned=c("contig_7654", "contig_17519")
writeXStringSet(contigs_wga[contig_set], "../sequences/Vesantovirus_D.sub.fa")
writeXStringSet(contigs_wga[contig_set_unassigned], "../sequences/Vesantovirus_D.sub_unassigned.fa")
# store for later fusion of corresponding lines
virus_list$'Vesantovirus_D.sub'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
tab=gff_wga2[which(gff_wga2$seqid %in% c(contig_set,contig_set_unassigned)),]
# contig_22871_59_3268_+ and contig_2659_3157_3486_ are not annotated correctly. modify
tab$annotation[tab$orf_name=="contig_22871_59_3268_+"]="DNA pol [Vesanto virus]"
tab$annotation[tab$orf_name=="contig_2659_3157_3486_-"]=""
tab
p=ggplot(tab, aes(xmin = start, xmax = end, y = seqid, forward = strand)) +
geom_gene_arrow(aes(lty=phase)) + facet_wrap(~ seqid, scales = "free_y", ncol = 1) +
theme_genes() + geom_segment(aes(y = seqid, yend = seqid, x=seq_length), xend = 100000, colour = "white", size = 2) + geom_segment(aes(y = seqid, yend = seqid, x=0), xend = -100000, colour = "white", size = 2) +
xlab("")+ylab("")
ggsave(
filename = paste0("../figures/orf_predictions/Vesantovirus_Dsub.pdf"),
plot = p
)
## Saving 7 x 5 in image
p
Looking at the assembly graph revealed the presence of inverted terminal repeats for the different segments, as is expected for segmented viruses. However, based on our data (only composed of short-reads), it is rather unclear as to how reconstitute the whole segment sequences. The contigs we deposited in the databases should thus be seen as incomplete fragments of the different segments and requires further analysis to get the full sequence organization.
graph seen using Bandage software. Colours represent sequence homology with the 8 contigs.
Phylogeny
We built a phylogenetic tree based on ORF 940-2433 corresponding to NS1 protein.
p = plot_phylogeny(file = "../phylogenies/contig_2659_940_2433_+_with_homologs-PhyML_tree")
p= p + xlim(0,16)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_2659_940_2433_+_with_homologs-PhyML_tree2.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Looking at unassigned contigs revealed that 4 contigs co-occurred with the other 8 contigs identified previously by protein sequence homology (blastx). Three out of four unassigned contigs (contigs 7654, 21183, 20579, 17519) have clear DNA sequence homology with segment 12 of a Vesantovirus found in D. melanogaster (MT496878.1). They may correspond to three fragments of a single segment that we were unable to assemble. We were unable to identify any sequence homology (neither at the protein level, nor at the nucleotide level) for contig 7654 (1991bp). It is however very likely that this sequence do belong to the Vesantovirus genome.
this homology was detected in the second round of blastx (mmseqs2) on whole nr
contig_set=c("contig_15192")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Parvoviridae2'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "Parvoviridae2")
## orf_name seqid source type start end score strand
## 254 contig_15192_2_1099_- contig_15192 getorf_JV gene 2 1099 . FALSE
## 255 contig_15192_2_1219_- contig_15192 getorf_JV gene 2 1219 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 254 1 1098 1263 <NA> NA NA NA
## 255 2 1218 1263 <NA> NA NA NA
## gap_opens qstart qend sstart send evalue bitscore annotation
## 254 NA NA NA NA NA NA NA <NA>
## 255 NA NA NA NA NA NA NA <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 254 contig_15192_2_1099_- contig_15192 getorf_JV gene 2 1099 . FALSE
## 255 contig_15192_2_1219_- contig_15192 getorf_JV gene 2 1219 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 254 1 1098 1263 <NA> NA NA NA
## 255 2 1218 1263 <NA> NA NA NA
## gap_opens qstart qend sstart send evalue bitscore annotation
## 254 NA NA NA NA NA NA NA <NA>
## 255 NA NA NA NA NA NA NA <NA>
Phylogeny
We built a phylogenetic tree based on ORF 940-2433 corresponding to NS1 protein.
p = plot_phylogeny(file = "../phylogenies/contig_15192_homolog_nr-PhyML_tree")
p + xlim(0,3)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
#ggsave(filename = "../phylogenies/contig_15192_homolog_nr-PhyML_tree.pdf", plot = p)
Previously described in D. simulans as we found also.
contig_set=c("contig_627", "contig_626")
contig_set_unassigned=NA
writeXStringSet(contigs_wga[contig_set], "../sequences/Linvil_road_virus_D.sim.fa")
# store for later fusion of corresponding lines
virus_list$'Linvill_road_virus_D.sim'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "Linvill_road_virus_D.sim")
## orf_name seqid source type start end score strand
## 1041 contig_626_120_659_+ contig_626 getorf_JV gene 120 659 . TRUE
## 1042 contig_626_1297_2685_- contig_626 getorf_JV gene 1297 2685 . FALSE
## 1043 contig_626_2723_3127_- contig_626 getorf_JV gene 2723 3127 . FALSE
## 1044 contig_626_3206_3856_- contig_626 getorf_JV gene 3206 3856 . FALSE
## 1045 contig_626_649_999_- contig_626 getorf_JV gene 649 999 . FALSE
## 1046 contig_627_2_1036_- contig_627 getorf_JV gene 2 1036 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 1041 1 540 3917 <NA> NA NA
## 1042 1 1389 3917 AQN78651.1 0.991 463
## 1043 1 405 3917 NP_051016.1 0.445 92
## 1044 1 651 3917 AQN78650.1 1.000 217
## 1045 1 351 3917 <NA> NA NA
## 1046 1 1035 1154 AQN78650.1 0.997 345
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 1041 NA NA NA NA NA NA NA NA
## 1042 4 0 1 463 1 463 6.637e-314 957
## 1043 50 0 6 97 152 242 4.364e-18 85
## 1044 0 0 1 217 358 574 1.606e-148 465
## 1045 NA NA NA NA NA NA NA NA
## 1046 1 0 1 345 1 345 2.570e-228 703
## annotation
## 1041 <NA>
## 1042 putative protein 2 [Linvill Road virus]
## 1043 structural protein [Periplaneta fuliginosa densovirus]
## 1044 putative protein 1 [Linvill Road virus]
## 1045 <NA>
## 1046 putative protein 1 [Linvill Road virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 1041 contig_626_120_659_+ contig_626 getorf_JV gene 120 659 . TRUE
## 1042 contig_626_1297_2685_- contig_626 getorf_JV gene 1297 2685 . FALSE
## 1043 contig_626_2723_3127_- contig_626 getorf_JV gene 2723 3127 . FALSE
## 1044 contig_626_3206_3856_- contig_626 getorf_JV gene 3206 3856 . FALSE
## 1045 contig_626_649_999_- contig_626 getorf_JV gene 649 999 . FALSE
## 1046 contig_627_2_1036_- contig_627 getorf_JV gene 2 1036 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 1041 1 540 3917 <NA> NA NA
## 1042 1 1389 3917 AQN78651.1 0.991 463
## 1043 1 405 3917 NP_051016.1 0.445 92
## 1044 1 651 3917 AQN78650.1 1.000 217
## 1045 1 351 3917 <NA> NA NA
## 1046 1 1035 1154 AQN78650.1 0.997 345
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 1041 NA NA NA NA NA NA NA NA
## 1042 4 0 1 463 1 463 6.637e-314 957
## 1043 50 0 6 97 152 242 4.364e-18 85
## 1044 0 0 1 217 358 574 1.606e-148 465
## 1045 NA NA NA NA NA NA NA NA
## 1046 1 0 1 345 1 345 2.570e-228 703
## annotation
## 1041 <NA>
## 1042 putative protein 2 [Linvill Road virus]
## 1043 structural protein [Periplaneta fuliginosa densovirus]
## 1044 putative protein 1 [Linvill Road virus]
## 1045 <NA>
## 1046 putative protein 1 [Linvill Road virus]
contig_set=paste0("contig_", c(1505, 22345, 1350, 22895, 22365, 22449, 22533, 19307,12283,22381))
# store for later fusion of corresponding lines
virus_list$'LbFV_L.b'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "LbFV_L.b")
## orf_name seqid source type start end score
## 132 contig_12283_3_152_- contig_12283 getorf_JV gene 3 152 .
## 133 contig_12283_387_539_- contig_12283 getorf_JV gene 387 539 .
## 134 contig_12283_693_878_- contig_12283 getorf_JV gene 693 878 .
## 197 contig_1350_1158_1364_+ contig_1350 getorf_JV gene 1158 1364 .
## 198 contig_1350_1472_1645_- contig_1350 getorf_JV gene 1472 1645 .
## 199 contig_1350_1824_2081_- contig_1350 getorf_JV gene 1824 2081 .
## 200 contig_1350_2089_4023_+ contig_1350 getorf_JV gene 2089 4023 .
## 201 contig_1350_3_257_- contig_1350 getorf_JV gene 3 257 .
## 202 contig_1350_315_680_- contig_1350 getorf_JV gene 315 680 .
## 203 contig_1350_4200_5126_+ contig_1350 getorf_JV gene 4200 5126 .
## 204 contig_1350_5229_6740_- contig_1350 getorf_JV gene 5229 6740 .
## 205 contig_1350_680_997_- contig_1350 getorf_JV gene 680 997 .
## 206 contig_1350_6947_7102_- contig_1350 getorf_JV gene 6947 7102 .
## 246 contig_1505_1234_3102_- contig_1505 getorf_JV gene 1234 3102 .
## 247 contig_1505_2_1186_- contig_1505 getorf_JV gene 2 1186 .
## 248 contig_1505_3572_5626_+ contig_1505 getorf_JV gene 3572 5626 .
## 249 contig_1505_5654_6397_- contig_1505 getorf_JV gene 5654 6397 .
## 478 contig_19307_248_433_+ contig_19307 getorf_JV gene 248 433 .
## 479 contig_19307_587_739_+ contig_19307 getorf_JV gene 587 739 .
## 649 contig_22345_1017_2027_+ contig_22345 getorf_JV gene 1017 2027 .
## 650 contig_22345_12095_12271_- contig_22345 getorf_JV gene 12095 12271 .
## 651 contig_22345_12686_15022_+ contig_22345 getorf_JV gene 12686 15022 .
## 652 contig_22345_15388_15663_+ contig_22345 getorf_JV gene 15388 15663 .
## 653 contig_22345_15826_16143_- contig_22345 getorf_JV gene 15826 16143 .
## 654 contig_22345_16234_16485_- contig_22345 getorf_JV gene 16234 16485 .
## 655 contig_22345_16630_17409_- contig_22345 getorf_JV gene 16630 17409 .
## 656 contig_22345_17417_17965_- contig_22345 getorf_JV gene 17417 17965 .
## 657 contig_22345_179_625_- contig_22345 getorf_JV gene 179 625 .
## 658 contig_22345_18010_20094_- contig_22345 getorf_JV gene 18010 20094 .
## 659 contig_22345_20331_21344_+ contig_22345 getorf_JV gene 20331 21344 .
## 660 contig_22345_2055_2882_+ contig_22345 getorf_JV gene 2055 2882 .
## 661 contig_22345_21392_21547_- contig_22345 getorf_JV gene 21392 21547 .
## 662 contig_22345_21617_22618_- contig_22345 getorf_JV gene 21617 22618 .
## 663 contig_22345_22700_23647_- contig_22345 getorf_JV gene 22700 23647 .
## 664 contig_22345_23792_23962_+ contig_22345 getorf_JV gene 23792 23962 .
## 665 contig_22345_2958_4103_+ contig_22345 getorf_JV gene 2958 4103 .
## 666 contig_22345_4521_5198_- contig_22345 getorf_JV gene 4521 5198 .
## 667 contig_22345_5208_5906_- contig_22345 getorf_JV gene 5208 5906 .
## 668 contig_22345_5931_6245_- contig_22345 getorf_JV gene 5931 6245 .
## 669 contig_22345_6770_7300_- contig_22345 getorf_JV gene 6770 7300 .
## 670 contig_22345_694_918_- contig_22345 getorf_JV gene 694 918 .
## 671 contig_22345_7624_8520_- contig_22345 getorf_JV gene 7624 8520 .
## 672 contig_22345_9649_11379_+ contig_22345 getorf_JV gene 9649 11379 .
## 673 contig_22365_10325_10870_- contig_22365 getorf_JV gene 10325 10870 .
## 674 contig_22365_10956_11165_- contig_22365 getorf_JV gene 10956 11165 .
## 675 contig_22365_11308_16086_- contig_22365 getorf_JV gene 11308 16086 .
## 676 contig_22365_1219_1794_+ contig_22365 getorf_JV gene 1219 1794 .
## 677 contig_22365_16399_16557_- contig_22365 getorf_JV gene 16399 16557 .
## 678 contig_22365_16643_17776_- contig_22365 getorf_JV gene 16643 17776 .
## 679 contig_22365_18040_18654_+ contig_22365 getorf_JV gene 18040 18654 .
## 680 contig_22365_1842_2921_+ contig_22365 getorf_JV gene 1842 2921 .
## 681 contig_22365_18779_19024_- contig_22365 getorf_JV gene 18779 19024 .
## 682 contig_22365_19027_19554_- contig_22365 getorf_JV gene 19027 19554 .
## 683 contig_22365_19642_19848_- contig_22365 getorf_JV gene 19642 19848 .
## 684 contig_22365_19829_20473_- contig_22365 getorf_JV gene 19829 20473 .
## 685 contig_22365_3229_4116_+ contig_22365 getorf_JV gene 3229 4116 .
## 686 contig_22365_356_1024_- contig_22365 getorf_JV gene 356 1024 .
## 687 contig_22365_4145_4450_+ contig_22365 getorf_JV gene 4145 4450 .
## 688 contig_22365_4548_5465_- contig_22365 getorf_JV gene 4548 5465 .
## 689 contig_22365_5498_6112_- contig_22365 getorf_JV gene 5498 6112 .
## 690 contig_22365_6127_6996_- contig_22365 getorf_JV gene 6127 6996 .
## 691 contig_22365_7069_10212_+ contig_22365 getorf_JV gene 7069 10212 .
## 692 contig_22381_10447_10659_- contig_22381 getorf_JV gene 10447 10659 .
## 693 contig_22381_10655_11848_- contig_22381 getorf_JV gene 10655 11848 .
## 694 contig_22381_12147_13829_- contig_22381 getorf_JV gene 12147 13829 .
## 695 contig_22381_13930_14562_+ contig_22381 getorf_JV gene 13930 14562 .
## 696 contig_22381_14583_15050_- contig_22381 getorf_JV gene 14583 15050 .
## 697 contig_22381_1502_2212_+ contig_22381 getorf_JV gene 1502 2212 .
## 698 contig_22381_15095_15760_+ contig_22381 getorf_JV gene 15095 15760 .
## 699 contig_22381_15756_16229_+ contig_22381 getorf_JV gene 15756 16229 .
## 700 contig_22381_16303_17325_- contig_22381 getorf_JV gene 16303 17325 .
## 701 contig_22381_2291_2440_+ contig_22381 getorf_JV gene 2291 2440 .
## 702 contig_22381_2686_2895_- contig_22381 getorf_JV gene 2686 2895 .
## 703 contig_22381_2915_3178_+ contig_22381 getorf_JV gene 2915 3178 .
## 704 contig_22381_3197_3406_- contig_22381 getorf_JV gene 3197 3406 .
## 705 contig_22381_322_1329_- contig_22381 getorf_JV gene 322 1329 .
## 706 contig_22381_3527_4222_+ contig_22381 getorf_JV gene 3527 4222 .
## 707 contig_22381_4311_9146_+ contig_22381 getorf_JV gene 4311 9146 .
## 708 contig_22381_9132_10343_- contig_22381 getorf_JV gene 9132 10343 .
## 709 contig_22449_11022_11357_+ contig_22449 getorf_JV gene 11022 11357 .
## 710 contig_22449_113_1198_+ contig_22449 getorf_JV gene 113 1198 .
## 711 contig_22449_11417_11641_+ contig_22449 getorf_JV gene 11417 11641 .
## 712 contig_22449_1235_1882_+ contig_22449 getorf_JV gene 1235 1882 .
## 713 contig_22449_1927_5850_- contig_22449 getorf_JV gene 1927 5850 .
## 714 contig_22449_5834_5983_- contig_22449 getorf_JV gene 5834 5983 .
## 715 contig_22449_5967_6521_+ contig_22449 getorf_JV gene 5967 6521 .
## 716 contig_22449_6524_7117_- contig_22449 getorf_JV gene 6524 7117 .
## 717 contig_22449_7138_7470_+ contig_22449 getorf_JV gene 7138 7470 .
## 718 contig_22449_7535_7786_+ contig_22449 getorf_JV gene 7535 7786 .
## 719 contig_22449_7877_8995_+ contig_22449 getorf_JV gene 7877 8995 .
## 720 contig_22449_8985_10409_+ contig_22449 getorf_JV gene 8985 10409 .
## 732 contig_22533_3206_4132_+ contig_22533 getorf_JV gene 3206 4132 .
## 733 contig_22533_383_616_- contig_22533 getorf_JV gene 383 616 .
## 734 contig_22533_4132_5184_+ contig_22533 getorf_JV gene 4132 5184 .
## 735 contig_22533_5193_6800_- contig_22533 getorf_JV gene 5193 6800 .
## 736 contig_22533_7018_7335_+ contig_22533 getorf_JV gene 7018 7335 .
## 737 contig_22533_7389_8687_+ contig_22533 getorf_JV gene 7389 8687 .
## 738 contig_22533_9105_9551_- contig_22533 getorf_JV gene 9105 9551 .
## 739 contig_22533_944_2971_+ contig_22533 getorf_JV gene 944 2971 .
## 740 contig_22533_9620_9844_- contig_22533 getorf_JV gene 9620 9844 .
## 763 contig_22895_2774_3307_+ contig_22895 getorf_JV gene 2774 3307 .
## 764 contig_22895_317_466_+ contig_22895 getorf_JV gene 317 466 .
## 765 contig_22895_3498_3944_+ contig_22895 getorf_JV gene 3498 3944 .
## 766 contig_22895_442_2628_+ contig_22895 getorf_JV gene 442 2628 .
## strand phase attributes seq_length subject_id identity alignment_length
## 132 FALSE 1 150 1450 YP_009345650.1 0.938 49
## 133 FALSE 1 153 1450 YP_009345649.1 1.000 51
## 134 FALSE 1 186 1450 YP_009345648.1 1.000 62
## 197 TRUE 1 207 7285 YP_009345675.1 1.000 69
## 198 FALSE 1 174 7285 YP_009345674.1 1.000 58
## 199 FALSE 1 258 7285 YP_009345673.1 1.000 86
## 200 TRUE 1 1935 7285 YP_009345672.1 1.000 645
## 201 FALSE 1 255 7285 YP_009345678.1 0.825 103
## 202 FALSE 1 366 7285 YP_009345677.1 1.000 122
## 203 TRUE 1 927 7285 YP_009345671.1 1.000 309
## 204 FALSE 1 1512 7285 YP_009345670.1 1.000 504
## 205 FALSE 1 318 7285 YP_009345676.1 1.000 106
## 206 FALSE 1 156 7285 YP_009345669.1 1.000 52
## 246 FALSE 1 1869 7049 YP_009345711.1 0.992 625
## 247 FALSE 1 1185 7049 YP_009345712.1 1.000 395
## 248 TRUE 1 2055 7049 YP_009345710.1 1.000 685
## 249 FALSE 1 744 7049 YP_009345709.1 1.000 248
## 478 TRUE 1 186 1091 YP_009345648.1 1.000 62
## 479 TRUE 1 153 1091 YP_009345649.1 1.000 51
## 649 TRUE 1 1011 24107 YP_009345626.1 1.000 337
## 650 FALSE 1 177 24107 YP_009345616.1 1.000 59
## 651 TRUE 1 2337 24107 YP_009345615.1 1.000 779
## 652 TRUE 1 276 24107 YP_009345614.1 1.000 92
## 653 FALSE 1 318 24107 YP_009345613.1 1.000 106
## 654 FALSE 1 252 24107 YP_009345612.1 1.000 84
## 655 FALSE 1 780 24107 YP_009345611.1 1.000 260
## 656 FALSE 1 549 24107 YP_009345610.1 1.000 183
## 657 FALSE 1 447 24107 YP_009345628.1 1.000 149
## 658 FALSE 1 2085 24107 YP_009345609.1 0.998 696
## 659 TRUE 1 1014 24107 YP_009345608.1 1.000 338
## 660 TRUE 1 828 24107 YP_009345625.1 1.000 276
## 661 FALSE 1 156 24107 YP_009345607.1 1.000 52
## 662 FALSE 1 1002 24107 YP_009345606.1 1.000 334
## 663 FALSE 1 948 24107 YP_009345605.1 1.000 316
## 664 TRUE 1 171 24107 YP_009345630.1 0.791 24
## 665 TRUE 1 1146 24107 YP_009345624.1 1.000 382
## 666 FALSE 1 678 24107 YP_009345623.1 1.000 226
## 667 FALSE 1 699 24107 YP_009345622.1 1.000 233
## 668 FALSE 1 315 24107 YP_009345621.1 1.000 105
## 669 FALSE 1 531 24107 YP_009345619.1 1.000 177
## 670 FALSE 1 225 24107 YP_009345627.1 1.000 75
## 671 FALSE 1 897 24107 YP_009345618.1 1.000 299
## 672 TRUE 1 1731 24107 YP_009345617.1 1.000 576
## 673 FALSE 1 546 20889 YP_009345698.1 1.000 182
## 674 FALSE 1 210 20889 YP_009345697.1 1.000 70
## 675 FALSE 1 4779 20889 YP_009345696.1 1.000 1593
## 676 TRUE 1 576 20889 YP_009345707.1 1.000 192
## 677 FALSE 1 159 20889 YP_009345695.1 1.000 53
## 678 FALSE 1 1134 20889 YP_009345694.1 1.000 378
## 679 TRUE 1 615 20889 YP_009345693.1 1.000 205
## 680 TRUE 1 1080 20889 YP_009345706.1 1.000 360
## 681 FALSE 1 246 20889 YP_009345692.1 1.000 82
## 682 FALSE 1 528 20889 YP_009345691.1 1.000 176
## 683 FALSE 1 207 20889 YP_009345690.1 1.000 69
## 684 FALSE 1 645 20889 YP_009345689.1 1.000 215
## 685 TRUE 1 888 20889 YP_009345705.1 1.000 296
## 686 FALSE 1 669 20889 YP_009345708.1 1.000 223
## 687 TRUE 1 306 20889 YP_009345704.1 1.000 102
## 688 FALSE 1 918 20889 YP_009345703.1 1.000 306
## 689 FALSE 1 615 20889 YP_009345702.1 1.000 205
## 690 FALSE 1 870 20889 YP_009345701.1 1.000 290
## 691 TRUE 1 3144 20889 YP_009345700.1 1.000 1048
## 692 FALSE 1 213 17653 YP_009345638.1 1.000 71
## 693 FALSE 1 1194 17653 YP_009345637.1 1.000 398
## 694 FALSE 1 1683 17653 YP_009345636.1 0.998 561
## 695 TRUE 1 633 17653 YP_009345635.1 1.000 211
## 696 FALSE 1 468 17653 YP_009345634.1 1.000 156
## 697 TRUE 1 711 17653 YP_009345646.1 1.000 237
## 698 TRUE 1 666 17653 YP_009345633.1 1.000 222
## 699 TRUE 1 474 17653 YP_009345632.1 1.000 158
## 700 FALSE 1 1023 17653 YP_009345631.1 1.000 341
## 701 TRUE 1 150 17653 YP_009345645.1 1.000 50
## 702 FALSE 1 210 17653 YP_009345644.1 1.000 70
## 703 TRUE 1 264 17653 YP_009345643.1 0.988 88
## 704 FALSE 1 210 17653 YP_009345642.1 1.000 70
## 705 FALSE 1 1008 17653 YP_009345647.1 1.000 336
## 706 TRUE 1 696 17653 YP_009345641.1 1.000 232
## 707 TRUE 1 4836 17653 YP_009345640.1 0.999 1612
## 708 FALSE 1 1212 17653 YP_009345639.1 1.000 404
## 709 TRUE 1 336 11641 YP_009345654.1 1.000 112
## 710 TRUE 1 1086 11641 YP_009345664.1 1.000 362
## 711 TRUE 1 225 11641 YP_009345653.1 1.000 75
## 712 TRUE 1 648 11641 YP_009345663.1 1.000 216
## 713 FALSE 1 3924 11641 YP_009345662.1 1.000 1308
## 714 FALSE 1 150 11641 YP_009345661.1 1.000 50
## 715 TRUE 1 555 11641 YP_009345660.1 1.000 185
## 716 FALSE 1 594 11641 YP_009345659.1 1.000 198
## 717 TRUE 1 333 11641 YP_009345658.1 1.000 111
## 718 TRUE 1 252 11641 YP_009345657.1 1.000 84
## 719 TRUE 1 1119 11641 YP_009345656.1 1.000 373
## 720 TRUE 1 1425 11641 YP_009345655.1 0.951 499
## 732 TRUE 1 927 9985 YP_009345683.1 1.000 309
## 733 FALSE 1 234 9985 YP_009345681.1 1.000 78
## 734 TRUE 1 1053 9985 YP_009345684.1 1.000 351
## 735 FALSE 1 1608 9985 YP_009345685.1 1.000 536
## 736 TRUE 1 318 9985 YP_009345686.1 1.000 106
## 737 TRUE 1 1299 9985 YP_009345687.1 1.000 433
## 738 FALSE 1 447 9985 YP_009345628.1 1.000 149
## 739 TRUE 1 2028 9985 YP_009345682.1 1.000 676
## 740 FALSE 1 225 9985 YP_009345627.1 1.000 75
## 763 TRUE 1 534 3946 YP_009345666.1 1.000 178
## 764 TRUE 1 150 3946 YP_009345668.1 1.000 50
## 765 TRUE 1 447 3946 YP_009345665.1 1.000 149
## 766 TRUE 1 2187 3946 YP_009345667.1 0.998 729
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 132 3 0 1 49 1 49 2.198e-22 92
## 133 0 0 1 51 1 51 2.328e-26 104
## 134 0 0 1 62 1 62 1.779e-35 130
## 197 0 0 1 69 1 69 2.387e-38 139
## 198 0 0 1 58 1 58 6.934e-31 117
## 199 0 0 1 86 1 86 2.175e-49 172
## 200 0 0 1 645 1 645 0.000e+00 1326
## 201 15 0 1 85 1 103 2.046e-38 140
## 202 0 0 1 122 1 122 1.196e-72 241
## 203 0 0 1 309 1 309 4.807e-203 628
## 204 0 0 1 504 1 504 0.000e+00 1092
## 205 0 0 1 106 1 106 2.906e-66 222
## 206 0 0 1 52 1 52 4.308e-29 111
## 246 5 0 1 623 1 625 0.000e+00 1234
## 247 0 0 1 395 1 395 1.306e-236 730
## 248 0 0 1 685 1 685 0.000e+00 1406
## 249 0 0 1 248 1 248 6.623e-164 511
## 478 0 0 1 62 1 62 1.779e-35 130
## 479 0 0 1 51 1 51 2.328e-26 104
## 649 0 0 1 337 1 337 2.746e-213 659
## 650 0 0 1 59 1 59 9.222e-30 114
## 651 0 0 1 779 1 779 0.000e+00 1557
## 652 0 0 1 92 1 92 1.679e-57 196
## 653 0 0 1 106 1 106 6.555e-61 206
## 654 0 0 1 84 1 84 6.745e-45 159
## 655 0 0 1 260 1 260 3.213e-167 522
## 656 0 0 1 183 1 183 1.607e-105 339
## 657 0 0 1 149 1 149 7.276e-98 315
## 658 1 0 1 695 1 696 0.000e+00 1358
## 659 0 0 1 338 1 338 4.436e-224 690
## 660 0 0 1 276 1 276 7.814e-175 544
## 661 0 0 1 52 1 52 9.466e-27 105
## 662 0 0 1 334 1 334 5.990e-216 666
## 663 0 0 1 316 1 316 1.406e-213 659
## 664 5 0 23 46 39 62 4.492e-05 43
## 665 0 0 1 382 1 382 1.715e-258 792
## 666 0 0 1 226 1 226 3.943e-143 450
## 667 0 0 1 233 1 233 7.519e-143 450
## 668 0 0 1 105 1 105 3.630e-62 210
## 669 0 0 1 177 1 177 1.009e-93 305
## 670 0 0 1 75 1 75 2.350e-38 139
## 671 0 0 1 299 1 299 1.230e-197 612
## 672 0 0 1 576 1 576 0.000e+00 1163
## 673 0 0 1 182 1 182 1.141e-122 389
## 674 0 0 1 70 1 70 8.911e-38 138
## 675 0 0 1 1593 1 1593 0.000e+00 3186
## 676 0 0 1 192 1 192 1.111e-120 383
## 677 0 0 1 53 1 53 3.547e-26 103
## 678 0 0 1 378 1 378 3.323e-244 751
## 679 0 0 1 205 1 205 4.974e-126 400
## 680 0 0 1 360 1 360 1.576e-245 753
## 681 0 0 1 82 1 82 6.666e-46 162
## 682 0 0 1 176 1 176 1.115e-108 348
## 683 0 0 1 69 1 69 1.380e-39 143
## 684 0 0 1 215 1 215 2.196e-139 439
## 685 0 0 1 296 1 296 2.457e-186 579
## 686 0 0 1 223 1 223 3.763e-144 453
## 687 0 0 1 102 1 102 6.453e-63 212
## 688 0 0 1 306 1 306 1.280e-194 603
## 689 0 0 1 205 1 205 3.866e-130 411
## 690 0 0 1 290 1 290 3.392e-189 587
## 691 0 0 1 1048 1 1048 0.000e+00 2079
## 692 0 0 1 71 1 71 7.899e-36 132
## 693 0 0 1 398 1 398 9.802e-265 811
## 694 1 0 1 561 1 561 0.000e+00 1134
## 695 0 0 1 211 1 211 1.410e-133 422
## 696 0 0 1 156 1 156 1.533e-98 317
## 697 0 0 1 237 1 237 3.203e-142 448
## 698 0 0 1 222 1 222 1.080e-138 437
## 699 0 0 1 158 1 158 8.791e-92 298
## 700 0 0 1 341 1 341 6.779e-240 736
## 701 0 0 1 50 1 50 1.080e-25 102
## 702 0 0 1 70 1 70 1.640e-33 125
## 703 1 0 1 88 1 88 1.231e-45 161
## 704 0 0 1 70 1 70 1.679e-37 137
## 705 0 0 1 336 1 336 2.054e-217 671
## 706 0 0 1 232 1 232 7.006e-156 487
## 707 2 0 1 1612 1 1612 0.000e+00 3178
## 708 0 0 1 404 1 404 2.685e-265 813
## 709 0 0 1 112 1 112 2.482e-68 228
## 710 0 0 1 362 1 362 1.094e-237 731
## 711 0 0 1 75 1 75 1.284e-42 152
## 712 0 0 1 216 1 216 7.395e-138 434
## 713 0 0 1 1308 1 1308 0.000e+00 2644
## 714 0 0 1 50 1 50 3.574e-28 109
## 715 0 0 1 185 1 185 6.417e-112 358
## 716 0 0 1 198 1 198 2.604e-126 400
## 717 0 0 1 111 1 111 1.437e-59 203
## 718 0 0 1 84 1 84 9.579e-49 170
## 719 0 0 1 373 1 373 6.608e-247 758
## 720 23 0 1 475 1 499 2.161e-300 918
## 732 0 0 1 309 1 309 1.881e-190 591
## 733 0 0 1 78 1 78 5.174e-49 170
## 734 0 0 1 351 1 351 2.875e-223 689
## 735 0 0 1 536 1 536 0.000e+00 1064
## 736 0 0 1 106 1 106 1.233e-67 226
## 737 0 0 1 433 1 433 9.237e-280 856
## 738 0 0 1 149 1 149 7.276e-98 315
## 739 0 0 1 676 1 676 0.000e+00 1362
## 740 0 0 1 75 1 75 2.350e-38 139
## 763 0 0 1 178 1 178 4.563e-122 387
## 764 0 0 1 50 1 50 1.271e-27 107
## 765 0 0 1 149 1 149 1.179e-92 300
## 766 1 0 1 729 1 729 0.000e+00 1454
## annotation
## 132 hypothetical protein LbFV_ORF46 [Leptopilina boulardi filamentous virus]
## 133 hypothetical protein LbFV_ORF45 [Leptopilina boulardi filamentous virus]
## 134 hypothetical protein LbFV_ORF44 [Leptopilina boulardi filamentous virus]
## 197 hypothetical protein LbFV_ORF71 [Leptopilina boulardi filamentous virus]
## 198 hypothetical protein LbFV_ORF70 [Leptopilina boulardi filamentous virus]
## 199 hypothetical protein LbFV_ORF69 [Leptopilina boulardi filamentous virus]
## 200 hypothetical protein LbFV_ORF68 [Leptopilina boulardi filamentous virus]
## 201 hypothetical protein LbFV_ORF74 [Leptopilina boulardi filamentous virus]
## 202 hypothetical protein LbFV_ORF73 [Leptopilina boulardi filamentous virus]
## 203 hypothetical protein LbFV_ORF67 [Leptopilina boulardi filamentous virus]
## 204 putative inhibitor of apoptosis [Leptopilina boulardi filamentous virus]
## 205 hypothetical protein LbFV_ORF72 [Leptopilina boulardi filamentous virus]
## 206 hypothetical protein LbFV_ORF65 [Leptopilina boulardi filamentous virus]
## 246 hypothetical protein LbFV_ORF107 [Leptopilina boulardi filamentous virus]
## 247 hypothetical protein LbFV_ORF108 [Leptopilina boulardi filamentous virus]
## 248 putative ODV protein [Leptopilina boulardi filamentous virus]
## 249 hypothetical protein LbFV_ORF105 [Leptopilina boulardi filamentous virus]
## 478 hypothetical protein LbFV_ORF44 [Leptopilina boulardi filamentous virus]
## 479 hypothetical protein LbFV_ORF45 [Leptopilina boulardi filamentous virus]
## 649 hypothetical protein LbFV_ORF22 [Leptopilina boulardi filamentous virus]
## 650 hypothetical protein LbFV_ORF12 [Leptopilina boulardi filamentous virus]
## 651 JmJC domain protein [Leptopilina boulardi filamentous virus]
## 652 hypothetical protein LbFV_ORF10 [Leptopilina boulardi filamentous virus]
## 653 hypothetical protein LbFV_ORF9 [Leptopilina boulardi filamentous virus]
## 654 hypothetical protein LbFV_ORF8 [Leptopilina boulardi filamentous virus]
## 655 hypothetical protein LbFV_ORF7 [Leptopilina boulardi filamentous virus]
## 656 hypothetical protein LbFV_ORF6 [Leptopilina boulardi filamentous virus]
## 657 hypothetical protein LbFV_ORF24 [Leptopilina boulardi filamentous virus]
## 658 hypothetical protein LbFV_ORF5 [Leptopilina boulardi filamentous virus]
## 659 hypothetical protein LbFV_ORF4 [Leptopilina boulardi filamentous virus]
## 660 hypothetical protein LbFV_ORF21 [Leptopilina boulardi filamentous virus]
## 661 hypothetical protein LbFV_ORF3 [Leptopilina boulardi filamentous virus]
## 662 hypothetical protein LbFV_ORF2 [Leptopilina boulardi filamentous virus]
## 663 hypothetical protein LbFV_ORF1 [Leptopilina boulardi filamentous virus]
## 664 hypothetical protein LbFV_ORF26 [Leptopilina boulardi filamentous virus]
## 665 hypothetical protein LbFV_ORF20 [Leptopilina boulardi filamentous virus]
## 666 hypothetical protein LbFV_ORF19 [Leptopilina boulardi filamentous virus]
## 667 hypothetical protein LbFV_ORF18 [Leptopilina boulardi filamentous virus]
## 668 hypothetical protein LbFV_ORF17 [Leptopilina boulardi filamentous virus]
## 669 hypothetical protein LbFV_ORF15 [Leptopilina boulardi filamentous virus]
## 670 hypothetical protein LbFV_ORF23 [Leptopilina boulardi filamentous virus]
## 671 BRO family [Leptopilina boulardi filamentous virus]
## 672 JmJC domain protein [Leptopilina boulardi filamentous virus]
## 673 hypothetical protein LbFV_ORF94 [Leptopilina boulardi filamentous virus]
## 674 hypothetical protein LbFV_ORF93 [Leptopilina boulardi filamentous virus]
## 675 hypothetical protein LbFV_ORF92 [Leptopilina boulardi filamentous virus]
## 676 hypothetical protein LbFV_ORF103 [Leptopilina boulardi filamentous virus]
## 677 hypothetical protein LbFV_ORF91 [Leptopilina boulardi filamentous virus]
## 678 hypothetical protein LbFV_ORF90 [Leptopilina boulardi filamentous virus]
## 679 hypothetical protein LbFV_ORF89 [Leptopilina boulardi filamentous virus]
## 680 hypothetical protein LbFV_ORF102 [Leptopilina boulardi filamentous virus]
## 681 hypothetical protein LbFV_ORF88 [Leptopilina boulardi filamentous virus]
## 682 hypothetical protein LbFV_ORF87 [Leptopilina boulardi filamentous virus]
## 683 hypothetical protein LbFV_ORF86 [Leptopilina boulardi filamentous virus]
## 684 hypothetical protein LbFV_ORF85 [Leptopilina boulardi filamentous virus]
## 685 MSV199 domain protein [Leptopilina boulardi filamentous virus]
## 686 hypothetical protein LbFV_ORF104 [Leptopilina boulardi filamentous virus]
## 687 hypothetical protein LbFV_ORF100 [Leptopilina boulardi filamentous virus]
## 688 hypothetical protein LbFV_ORF99 [Leptopilina boulardi filamentous virus]
## 689 hypothetical protein LbFV_ORF98 [Leptopilina boulardi filamentous virus]
## 690 hypothetical protein LbFV_ORF97 [Leptopilina boulardi filamentous virus]
## 691 hypothetical protein LbFV_ORF96 [Leptopilina boulardi filamentous virus]
## 692 hypothetical protein LbFV_ORF34 [Leptopilina boulardi filamentous virus]
## 693 hypothetical protein LbFV_ORF33 [Leptopilina boulardi filamentous virus]
## 694 hypothetical protein LbFV_ORF32 [Leptopilina boulardi filamentous virus]
## 695 hypothetical protein LbFV_ORF31 [Leptopilina boulardi filamentous virus]
## 696 hypothetical protein LbFV_ORF30 [Leptopilina boulardi filamentous virus]
## 697 hypothetical protein LbFV_ORF42 [Leptopilina boulardi filamentous virus]
## 698 hypothetical protein LbFV_ORF29 [Leptopilina boulardi filamentous virus]
## 699 hypothetical protein LbFV_ORF28 [Leptopilina boulardi filamentous virus]
## 700 putative inhibitor of apoptosis [Leptopilina boulardi filamentous virus]
## 701 hypothetical protein LbFV_ORF41 [Leptopilina boulardi filamentous virus]
## 702 hypothetical protein LbFV_ORF40 [Leptopilina boulardi filamentous virus]
## 703 hypothetical protein LbFV_ORF39 [Leptopilina boulardi filamentous virus]
## 704 hypothetical protein LbFV_ORF38 [Leptopilina boulardi filamentous virus]
## 705 hypothetical protein LbFV_ORF43 [Leptopilina boulardi filamentous virus]
## 706 nudix domain protein [Leptopilina boulardi filamentous virus]
## 707 hypothetical protein LbFV_ORF36 [Leptopilina boulardi filamentous virus]
## 708 hypothetical protein LbFV_ORF35 [Leptopilina boulardi filamentous virus]
## 709 putative deoxynucleoside kinase [Leptopilina boulardi filamentous virus]
## 710 putative lecithin:cholesterol acyltransferase [Leptopilina boulardi filamentous virus]
## 711 putative deoxynucleoside kinase [Leptopilina boulardi filamentous virus]
## 712 hypothetical protein LbFV_ORF59 [Leptopilina boulardi filamentous virus]
## 713 putative DNA pol [Leptopilina boulardi filamentous virus]
## 714 hypothetical protein LbFV_ORF57 [Leptopilina boulardi filamentous virus]
## 715 hypothetical protein LbFV_ORF56 [Leptopilina boulardi filamentous virus]
## 716 hypothetical protein LbFV_ORF55 [Leptopilina boulardi filamentous virus]
## 717 hypothetical protein LbFV_ORF54 [Leptopilina boulardi filamentous virus]
## 718 hypothetical protein LbFV_ORF53 [Leptopilina boulardi filamentous virus]
## 719 hypothetical protein LbFV_ORF52 [Leptopilina boulardi filamentous virus]
## 720 hypothetical protein LbFV_ORF51 [Leptopilina boulardi filamentous virus]
## 732 hypothetical protein LbFV_ORF79 [Leptopilina boulardi filamentous virus]
## 733 hypothetical protein LbFV_ORF77 [Leptopilina boulardi filamentous virus]
## 734 hypothetical protein LbFV_ORF80 [Leptopilina boulardi filamentous virus]
## 735 putative ATPase [Leptopilina boulardi filamentous virus]
## 736 hypothetical protein LbFV_ORF82 [Leptopilina boulardi filamentous virus]
## 737 hypothetical protein LbFV_ORF83 [Leptopilina boulardi filamentous virus]
## 738 hypothetical protein LbFV_ORF24 [Leptopilina boulardi filamentous virus]
## 739 hypothetical protein LbFV_ORF78 [Leptopilina boulardi filamentous virus]
## 740 hypothetical protein LbFV_ORF23 [Leptopilina boulardi filamentous virus]
## 763 hypothetical protein LbFV_ORF62 [Leptopilina boulardi filamentous virus]
## 764 hypothetical protein LbFV_ORF64 [Leptopilina boulardi filamentous virus]
## 765 mucin-like protein [Leptopilina boulardi filamentous virus]
## 766 hypothetical protein LbFV_ORF63 [Leptopilina boulardi filamentous virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score
## 132 contig_12283_3_152_- contig_12283 getorf_JV gene 3 152 .
## 133 contig_12283_387_539_- contig_12283 getorf_JV gene 387 539 .
## 134 contig_12283_693_878_- contig_12283 getorf_JV gene 693 878 .
## 197 contig_1350_1158_1364_+ contig_1350 getorf_JV gene 1158 1364 .
## 198 contig_1350_1472_1645_- contig_1350 getorf_JV gene 1472 1645 .
## 199 contig_1350_1824_2081_- contig_1350 getorf_JV gene 1824 2081 .
## 200 contig_1350_2089_4023_+ contig_1350 getorf_JV gene 2089 4023 .
## 201 contig_1350_3_257_- contig_1350 getorf_JV gene 3 257 .
## 202 contig_1350_315_680_- contig_1350 getorf_JV gene 315 680 .
## 203 contig_1350_4200_5126_+ contig_1350 getorf_JV gene 4200 5126 .
## 204 contig_1350_5229_6740_- contig_1350 getorf_JV gene 5229 6740 .
## 205 contig_1350_680_997_- contig_1350 getorf_JV gene 680 997 .
## 206 contig_1350_6947_7102_- contig_1350 getorf_JV gene 6947 7102 .
## 246 contig_1505_1234_3102_- contig_1505 getorf_JV gene 1234 3102 .
## 247 contig_1505_2_1186_- contig_1505 getorf_JV gene 2 1186 .
## 248 contig_1505_3572_5626_+ contig_1505 getorf_JV gene 3572 5626 .
## 249 contig_1505_5654_6397_- contig_1505 getorf_JV gene 5654 6397 .
## 478 contig_19307_248_433_+ contig_19307 getorf_JV gene 248 433 .
## 479 contig_19307_587_739_+ contig_19307 getorf_JV gene 587 739 .
## 649 contig_22345_1017_2027_+ contig_22345 getorf_JV gene 1017 2027 .
## 650 contig_22345_12095_12271_- contig_22345 getorf_JV gene 12095 12271 .
## 651 contig_22345_12686_15022_+ contig_22345 getorf_JV gene 12686 15022 .
## 652 contig_22345_15388_15663_+ contig_22345 getorf_JV gene 15388 15663 .
## 653 contig_22345_15826_16143_- contig_22345 getorf_JV gene 15826 16143 .
## 654 contig_22345_16234_16485_- contig_22345 getorf_JV gene 16234 16485 .
## 655 contig_22345_16630_17409_- contig_22345 getorf_JV gene 16630 17409 .
## 656 contig_22345_17417_17965_- contig_22345 getorf_JV gene 17417 17965 .
## 657 contig_22345_179_625_- contig_22345 getorf_JV gene 179 625 .
## 658 contig_22345_18010_20094_- contig_22345 getorf_JV gene 18010 20094 .
## 659 contig_22345_20331_21344_+ contig_22345 getorf_JV gene 20331 21344 .
## 660 contig_22345_2055_2882_+ contig_22345 getorf_JV gene 2055 2882 .
## 661 contig_22345_21392_21547_- contig_22345 getorf_JV gene 21392 21547 .
## 662 contig_22345_21617_22618_- contig_22345 getorf_JV gene 21617 22618 .
## 663 contig_22345_22700_23647_- contig_22345 getorf_JV gene 22700 23647 .
## 664 contig_22345_23792_23962_+ contig_22345 getorf_JV gene 23792 23962 .
## 665 contig_22345_2958_4103_+ contig_22345 getorf_JV gene 2958 4103 .
## 666 contig_22345_4521_5198_- contig_22345 getorf_JV gene 4521 5198 .
## 667 contig_22345_5208_5906_- contig_22345 getorf_JV gene 5208 5906 .
## 668 contig_22345_5931_6245_- contig_22345 getorf_JV gene 5931 6245 .
## 669 contig_22345_6770_7300_- contig_22345 getorf_JV gene 6770 7300 .
## 670 contig_22345_694_918_- contig_22345 getorf_JV gene 694 918 .
## 671 contig_22345_7624_8520_- contig_22345 getorf_JV gene 7624 8520 .
## 672 contig_22345_9649_11379_+ contig_22345 getorf_JV gene 9649 11379 .
## 673 contig_22365_10325_10870_- contig_22365 getorf_JV gene 10325 10870 .
## 674 contig_22365_10956_11165_- contig_22365 getorf_JV gene 10956 11165 .
## 675 contig_22365_11308_16086_- contig_22365 getorf_JV gene 11308 16086 .
## 676 contig_22365_1219_1794_+ contig_22365 getorf_JV gene 1219 1794 .
## 677 contig_22365_16399_16557_- contig_22365 getorf_JV gene 16399 16557 .
## 678 contig_22365_16643_17776_- contig_22365 getorf_JV gene 16643 17776 .
## 679 contig_22365_18040_18654_+ contig_22365 getorf_JV gene 18040 18654 .
## 680 contig_22365_1842_2921_+ contig_22365 getorf_JV gene 1842 2921 .
## 681 contig_22365_18779_19024_- contig_22365 getorf_JV gene 18779 19024 .
## 682 contig_22365_19027_19554_- contig_22365 getorf_JV gene 19027 19554 .
## 683 contig_22365_19642_19848_- contig_22365 getorf_JV gene 19642 19848 .
## 684 contig_22365_19829_20473_- contig_22365 getorf_JV gene 19829 20473 .
## 685 contig_22365_3229_4116_+ contig_22365 getorf_JV gene 3229 4116 .
## 686 contig_22365_356_1024_- contig_22365 getorf_JV gene 356 1024 .
## 687 contig_22365_4145_4450_+ contig_22365 getorf_JV gene 4145 4450 .
## 688 contig_22365_4548_5465_- contig_22365 getorf_JV gene 4548 5465 .
## 689 contig_22365_5498_6112_- contig_22365 getorf_JV gene 5498 6112 .
## 690 contig_22365_6127_6996_- contig_22365 getorf_JV gene 6127 6996 .
## 691 contig_22365_7069_10212_+ contig_22365 getorf_JV gene 7069 10212 .
## 692 contig_22381_10447_10659_- contig_22381 getorf_JV gene 10447 10659 .
## 693 contig_22381_10655_11848_- contig_22381 getorf_JV gene 10655 11848 .
## 694 contig_22381_12147_13829_- contig_22381 getorf_JV gene 12147 13829 .
## 695 contig_22381_13930_14562_+ contig_22381 getorf_JV gene 13930 14562 .
## 696 contig_22381_14583_15050_- contig_22381 getorf_JV gene 14583 15050 .
## 697 contig_22381_1502_2212_+ contig_22381 getorf_JV gene 1502 2212 .
## 698 contig_22381_15095_15760_+ contig_22381 getorf_JV gene 15095 15760 .
## 699 contig_22381_15756_16229_+ contig_22381 getorf_JV gene 15756 16229 .
## 700 contig_22381_16303_17325_- contig_22381 getorf_JV gene 16303 17325 .
## 701 contig_22381_2291_2440_+ contig_22381 getorf_JV gene 2291 2440 .
## 702 contig_22381_2686_2895_- contig_22381 getorf_JV gene 2686 2895 .
## 703 contig_22381_2915_3178_+ contig_22381 getorf_JV gene 2915 3178 .
## 704 contig_22381_3197_3406_- contig_22381 getorf_JV gene 3197 3406 .
## 705 contig_22381_322_1329_- contig_22381 getorf_JV gene 322 1329 .
## 706 contig_22381_3527_4222_+ contig_22381 getorf_JV gene 3527 4222 .
## 707 contig_22381_4311_9146_+ contig_22381 getorf_JV gene 4311 9146 .
## 708 contig_22381_9132_10343_- contig_22381 getorf_JV gene 9132 10343 .
## 709 contig_22449_11022_11357_+ contig_22449 getorf_JV gene 11022 11357 .
## 710 contig_22449_113_1198_+ contig_22449 getorf_JV gene 113 1198 .
## 711 contig_22449_11417_11641_+ contig_22449 getorf_JV gene 11417 11641 .
## 712 contig_22449_1235_1882_+ contig_22449 getorf_JV gene 1235 1882 .
## 713 contig_22449_1927_5850_- contig_22449 getorf_JV gene 1927 5850 .
## 714 contig_22449_5834_5983_- contig_22449 getorf_JV gene 5834 5983 .
## 715 contig_22449_5967_6521_+ contig_22449 getorf_JV gene 5967 6521 .
## 716 contig_22449_6524_7117_- contig_22449 getorf_JV gene 6524 7117 .
## 717 contig_22449_7138_7470_+ contig_22449 getorf_JV gene 7138 7470 .
## 718 contig_22449_7535_7786_+ contig_22449 getorf_JV gene 7535 7786 .
## 719 contig_22449_7877_8995_+ contig_22449 getorf_JV gene 7877 8995 .
## 720 contig_22449_8985_10409_+ contig_22449 getorf_JV gene 8985 10409 .
## 732 contig_22533_3206_4132_+ contig_22533 getorf_JV gene 3206 4132 .
## 733 contig_22533_383_616_- contig_22533 getorf_JV gene 383 616 .
## 734 contig_22533_4132_5184_+ contig_22533 getorf_JV gene 4132 5184 .
## 735 contig_22533_5193_6800_- contig_22533 getorf_JV gene 5193 6800 .
## 736 contig_22533_7018_7335_+ contig_22533 getorf_JV gene 7018 7335 .
## 737 contig_22533_7389_8687_+ contig_22533 getorf_JV gene 7389 8687 .
## 738 contig_22533_9105_9551_- contig_22533 getorf_JV gene 9105 9551 .
## 739 contig_22533_944_2971_+ contig_22533 getorf_JV gene 944 2971 .
## 740 contig_22533_9620_9844_- contig_22533 getorf_JV gene 9620 9844 .
## 763 contig_22895_2774_3307_+ contig_22895 getorf_JV gene 2774 3307 .
## 764 contig_22895_317_466_+ contig_22895 getorf_JV gene 317 466 .
## 765 contig_22895_3498_3944_+ contig_22895 getorf_JV gene 3498 3944 .
## 766 contig_22895_442_2628_+ contig_22895 getorf_JV gene 442 2628 .
## strand phase attributes seq_length subject_id identity alignment_length
## 132 FALSE 1 150 1450 YP_009345650.1 0.938 49
## 133 FALSE 1 153 1450 YP_009345649.1 1.000 51
## 134 FALSE 1 186 1450 YP_009345648.1 1.000 62
## 197 TRUE 1 207 7285 YP_009345675.1 1.000 69
## 198 FALSE 1 174 7285 YP_009345674.1 1.000 58
## 199 FALSE 1 258 7285 YP_009345673.1 1.000 86
## 200 TRUE 1 1935 7285 YP_009345672.1 1.000 645
## 201 FALSE 1 255 7285 YP_009345678.1 0.825 103
## 202 FALSE 1 366 7285 YP_009345677.1 1.000 122
## 203 TRUE 1 927 7285 YP_009345671.1 1.000 309
## 204 FALSE 1 1512 7285 YP_009345670.1 1.000 504
## 205 FALSE 1 318 7285 YP_009345676.1 1.000 106
## 206 FALSE 1 156 7285 YP_009345669.1 1.000 52
## 246 FALSE 1 1869 7049 YP_009345711.1 0.992 625
## 247 FALSE 1 1185 7049 YP_009345712.1 1.000 395
## 248 TRUE 1 2055 7049 YP_009345710.1 1.000 685
## 249 FALSE 1 744 7049 YP_009345709.1 1.000 248
## 478 TRUE 1 186 1091 YP_009345648.1 1.000 62
## 479 TRUE 1 153 1091 YP_009345649.1 1.000 51
## 649 TRUE 1 1011 24107 YP_009345626.1 1.000 337
## 650 FALSE 1 177 24107 YP_009345616.1 1.000 59
## 651 TRUE 1 2337 24107 YP_009345615.1 1.000 779
## 652 TRUE 1 276 24107 YP_009345614.1 1.000 92
## 653 FALSE 1 318 24107 YP_009345613.1 1.000 106
## 654 FALSE 1 252 24107 YP_009345612.1 1.000 84
## 655 FALSE 1 780 24107 YP_009345611.1 1.000 260
## 656 FALSE 1 549 24107 YP_009345610.1 1.000 183
## 657 FALSE 1 447 24107 YP_009345628.1 1.000 149
## 658 FALSE 1 2085 24107 YP_009345609.1 0.998 696
## 659 TRUE 1 1014 24107 YP_009345608.1 1.000 338
## 660 TRUE 1 828 24107 YP_009345625.1 1.000 276
## 661 FALSE 1 156 24107 YP_009345607.1 1.000 52
## 662 FALSE 1 1002 24107 YP_009345606.1 1.000 334
## 663 FALSE 1 948 24107 YP_009345605.1 1.000 316
## 664 TRUE 1 171 24107 YP_009345630.1 0.791 24
## 665 TRUE 1 1146 24107 YP_009345624.1 1.000 382
## 666 FALSE 1 678 24107 YP_009345623.1 1.000 226
## 667 FALSE 1 699 24107 YP_009345622.1 1.000 233
## 668 FALSE 1 315 24107 YP_009345621.1 1.000 105
## 669 FALSE 1 531 24107 YP_009345619.1 1.000 177
## 670 FALSE 1 225 24107 YP_009345627.1 1.000 75
## 671 FALSE 1 897 24107 YP_009345618.1 1.000 299
## 672 TRUE 1 1731 24107 YP_009345617.1 1.000 576
## 673 FALSE 1 546 20889 YP_009345698.1 1.000 182
## 674 FALSE 1 210 20889 YP_009345697.1 1.000 70
## 675 FALSE 1 4779 20889 YP_009345696.1 1.000 1593
## 676 TRUE 1 576 20889 YP_009345707.1 1.000 192
## 677 FALSE 1 159 20889 YP_009345695.1 1.000 53
## 678 FALSE 1 1134 20889 YP_009345694.1 1.000 378
## 679 TRUE 1 615 20889 YP_009345693.1 1.000 205
## 680 TRUE 1 1080 20889 YP_009345706.1 1.000 360
## 681 FALSE 1 246 20889 YP_009345692.1 1.000 82
## 682 FALSE 1 528 20889 YP_009345691.1 1.000 176
## 683 FALSE 1 207 20889 YP_009345690.1 1.000 69
## 684 FALSE 1 645 20889 YP_009345689.1 1.000 215
## 685 TRUE 1 888 20889 YP_009345705.1 1.000 296
## 686 FALSE 1 669 20889 YP_009345708.1 1.000 223
## 687 TRUE 1 306 20889 YP_009345704.1 1.000 102
## 688 FALSE 1 918 20889 YP_009345703.1 1.000 306
## 689 FALSE 1 615 20889 YP_009345702.1 1.000 205
## 690 FALSE 1 870 20889 YP_009345701.1 1.000 290
## 691 TRUE 1 3144 20889 YP_009345700.1 1.000 1048
## 692 FALSE 1 213 17653 YP_009345638.1 1.000 71
## 693 FALSE 1 1194 17653 YP_009345637.1 1.000 398
## 694 FALSE 1 1683 17653 YP_009345636.1 0.998 561
## 695 TRUE 1 633 17653 YP_009345635.1 1.000 211
## 696 FALSE 1 468 17653 YP_009345634.1 1.000 156
## 697 TRUE 1 711 17653 YP_009345646.1 1.000 237
## 698 TRUE 1 666 17653 YP_009345633.1 1.000 222
## 699 TRUE 1 474 17653 YP_009345632.1 1.000 158
## 700 FALSE 1 1023 17653 YP_009345631.1 1.000 341
## 701 TRUE 1 150 17653 YP_009345645.1 1.000 50
## 702 FALSE 1 210 17653 YP_009345644.1 1.000 70
## 703 TRUE 1 264 17653 YP_009345643.1 0.988 88
## 704 FALSE 1 210 17653 YP_009345642.1 1.000 70
## 705 FALSE 1 1008 17653 YP_009345647.1 1.000 336
## 706 TRUE 1 696 17653 YP_009345641.1 1.000 232
## 707 TRUE 1 4836 17653 YP_009345640.1 0.999 1612
## 708 FALSE 1 1212 17653 YP_009345639.1 1.000 404
## 709 TRUE 1 336 11641 YP_009345654.1 1.000 112
## 710 TRUE 1 1086 11641 YP_009345664.1 1.000 362
## 711 TRUE 1 225 11641 YP_009345653.1 1.000 75
## 712 TRUE 1 648 11641 YP_009345663.1 1.000 216
## 713 FALSE 1 3924 11641 YP_009345662.1 1.000 1308
## 714 FALSE 1 150 11641 YP_009345661.1 1.000 50
## 715 TRUE 1 555 11641 YP_009345660.1 1.000 185
## 716 FALSE 1 594 11641 YP_009345659.1 1.000 198
## 717 TRUE 1 333 11641 YP_009345658.1 1.000 111
## 718 TRUE 1 252 11641 YP_009345657.1 1.000 84
## 719 TRUE 1 1119 11641 YP_009345656.1 1.000 373
## 720 TRUE 1 1425 11641 YP_009345655.1 0.951 499
## 732 TRUE 1 927 9985 YP_009345683.1 1.000 309
## 733 FALSE 1 234 9985 YP_009345681.1 1.000 78
## 734 TRUE 1 1053 9985 YP_009345684.1 1.000 351
## 735 FALSE 1 1608 9985 YP_009345685.1 1.000 536
## 736 TRUE 1 318 9985 YP_009345686.1 1.000 106
## 737 TRUE 1 1299 9985 YP_009345687.1 1.000 433
## 738 FALSE 1 447 9985 YP_009345628.1 1.000 149
## 739 TRUE 1 2028 9985 YP_009345682.1 1.000 676
## 740 FALSE 1 225 9985 YP_009345627.1 1.000 75
## 763 TRUE 1 534 3946 YP_009345666.1 1.000 178
## 764 TRUE 1 150 3946 YP_009345668.1 1.000 50
## 765 TRUE 1 447 3946 YP_009345665.1 1.000 149
## 766 TRUE 1 2187 3946 YP_009345667.1 0.998 729
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 132 3 0 1 49 1 49 2.198e-22 92
## 133 0 0 1 51 1 51 2.328e-26 104
## 134 0 0 1 62 1 62 1.779e-35 130
## 197 0 0 1 69 1 69 2.387e-38 139
## 198 0 0 1 58 1 58 6.934e-31 117
## 199 0 0 1 86 1 86 2.175e-49 172
## 200 0 0 1 645 1 645 0.000e+00 1326
## 201 15 0 1 85 1 103 2.046e-38 140
## 202 0 0 1 122 1 122 1.196e-72 241
## 203 0 0 1 309 1 309 4.807e-203 628
## 204 0 0 1 504 1 504 0.000e+00 1092
## 205 0 0 1 106 1 106 2.906e-66 222
## 206 0 0 1 52 1 52 4.308e-29 111
## 246 5 0 1 623 1 625 0.000e+00 1234
## 247 0 0 1 395 1 395 1.306e-236 730
## 248 0 0 1 685 1 685 0.000e+00 1406
## 249 0 0 1 248 1 248 6.623e-164 511
## 478 0 0 1 62 1 62 1.779e-35 130
## 479 0 0 1 51 1 51 2.328e-26 104
## 649 0 0 1 337 1 337 2.746e-213 659
## 650 0 0 1 59 1 59 9.222e-30 114
## 651 0 0 1 779 1 779 0.000e+00 1557
## 652 0 0 1 92 1 92 1.679e-57 196
## 653 0 0 1 106 1 106 6.555e-61 206
## 654 0 0 1 84 1 84 6.745e-45 159
## 655 0 0 1 260 1 260 3.213e-167 522
## 656 0 0 1 183 1 183 1.607e-105 339
## 657 0 0 1 149 1 149 7.276e-98 315
## 658 1 0 1 695 1 696 0.000e+00 1358
## 659 0 0 1 338 1 338 4.436e-224 690
## 660 0 0 1 276 1 276 7.814e-175 544
## 661 0 0 1 52 1 52 9.466e-27 105
## 662 0 0 1 334 1 334 5.990e-216 666
## 663 0 0 1 316 1 316 1.406e-213 659
## 664 5 0 23 46 39 62 4.492e-05 43
## 665 0 0 1 382 1 382 1.715e-258 792
## 666 0 0 1 226 1 226 3.943e-143 450
## 667 0 0 1 233 1 233 7.519e-143 450
## 668 0 0 1 105 1 105 3.630e-62 210
## 669 0 0 1 177 1 177 1.009e-93 305
## 670 0 0 1 75 1 75 2.350e-38 139
## 671 0 0 1 299 1 299 1.230e-197 612
## 672 0 0 1 576 1 576 0.000e+00 1163
## 673 0 0 1 182 1 182 1.141e-122 389
## 674 0 0 1 70 1 70 8.911e-38 138
## 675 0 0 1 1593 1 1593 0.000e+00 3186
## 676 0 0 1 192 1 192 1.111e-120 383
## 677 0 0 1 53 1 53 3.547e-26 103
## 678 0 0 1 378 1 378 3.323e-244 751
## 679 0 0 1 205 1 205 4.974e-126 400
## 680 0 0 1 360 1 360 1.576e-245 753
## 681 0 0 1 82 1 82 6.666e-46 162
## 682 0 0 1 176 1 176 1.115e-108 348
## 683 0 0 1 69 1 69 1.380e-39 143
## 684 0 0 1 215 1 215 2.196e-139 439
## 685 0 0 1 296 1 296 2.457e-186 579
## 686 0 0 1 223 1 223 3.763e-144 453
## 687 0 0 1 102 1 102 6.453e-63 212
## 688 0 0 1 306 1 306 1.280e-194 603
## 689 0 0 1 205 1 205 3.866e-130 411
## 690 0 0 1 290 1 290 3.392e-189 587
## 691 0 0 1 1048 1 1048 0.000e+00 2079
## 692 0 0 1 71 1 71 7.899e-36 132
## 693 0 0 1 398 1 398 9.802e-265 811
## 694 1 0 1 561 1 561 0.000e+00 1134
## 695 0 0 1 211 1 211 1.410e-133 422
## 696 0 0 1 156 1 156 1.533e-98 317
## 697 0 0 1 237 1 237 3.203e-142 448
## 698 0 0 1 222 1 222 1.080e-138 437
## 699 0 0 1 158 1 158 8.791e-92 298
## 700 0 0 1 341 1 341 6.779e-240 736
## 701 0 0 1 50 1 50 1.080e-25 102
## 702 0 0 1 70 1 70 1.640e-33 125
## 703 1 0 1 88 1 88 1.231e-45 161
## 704 0 0 1 70 1 70 1.679e-37 137
## 705 0 0 1 336 1 336 2.054e-217 671
## 706 0 0 1 232 1 232 7.006e-156 487
## 707 2 0 1 1612 1 1612 0.000e+00 3178
## 708 0 0 1 404 1 404 2.685e-265 813
## 709 0 0 1 112 1 112 2.482e-68 228
## 710 0 0 1 362 1 362 1.094e-237 731
## 711 0 0 1 75 1 75 1.284e-42 152
## 712 0 0 1 216 1 216 7.395e-138 434
## 713 0 0 1 1308 1 1308 0.000e+00 2644
## 714 0 0 1 50 1 50 3.574e-28 109
## 715 0 0 1 185 1 185 6.417e-112 358
## 716 0 0 1 198 1 198 2.604e-126 400
## 717 0 0 1 111 1 111 1.437e-59 203
## 718 0 0 1 84 1 84 9.579e-49 170
## 719 0 0 1 373 1 373 6.608e-247 758
## 720 23 0 1 475 1 499 2.161e-300 918
## 732 0 0 1 309 1 309 1.881e-190 591
## 733 0 0 1 78 1 78 5.174e-49 170
## 734 0 0 1 351 1 351 2.875e-223 689
## 735 0 0 1 536 1 536 0.000e+00 1064
## 736 0 0 1 106 1 106 1.233e-67 226
## 737 0 0 1 433 1 433 9.237e-280 856
## 738 0 0 1 149 1 149 7.276e-98 315
## 739 0 0 1 676 1 676 0.000e+00 1362
## 740 0 0 1 75 1 75 2.350e-38 139
## 763 0 0 1 178 1 178 4.563e-122 387
## 764 0 0 1 50 1 50 1.271e-27 107
## 765 0 0 1 149 1 149 1.179e-92 300
## 766 1 0 1 729 1 729 0.000e+00 1454
## annotation
## 132 hypothetical protein LbFV_ORF46 [Leptopilina boulardi filamentous virus]
## 133 hypothetical protein LbFV_ORF45 [Leptopilina boulardi filamentous virus]
## 134 hypothetical protein LbFV_ORF44 [Leptopilina boulardi filamentous virus]
## 197 hypothetical protein LbFV_ORF71 [Leptopilina boulardi filamentous virus]
## 198 hypothetical protein LbFV_ORF70 [Leptopilina boulardi filamentous virus]
## 199 hypothetical protein LbFV_ORF69 [Leptopilina boulardi filamentous virus]
## 200 hypothetical protein LbFV_ORF68 [Leptopilina boulardi filamentous virus]
## 201 hypothetical protein LbFV_ORF74 [Leptopilina boulardi filamentous virus]
## 202 hypothetical protein LbFV_ORF73 [Leptopilina boulardi filamentous virus]
## 203 hypothetical protein LbFV_ORF67 [Leptopilina boulardi filamentous virus]
## 204 putative inhibitor of apoptosis [Leptopilina boulardi filamentous virus]
## 205 hypothetical protein LbFV_ORF72 [Leptopilina boulardi filamentous virus]
## 206 hypothetical protein LbFV_ORF65 [Leptopilina boulardi filamentous virus]
## 246 hypothetical protein LbFV_ORF107 [Leptopilina boulardi filamentous virus]
## 247 hypothetical protein LbFV_ORF108 [Leptopilina boulardi filamentous virus]
## 248 putative ODV protein [Leptopilina boulardi filamentous virus]
## 249 hypothetical protein LbFV_ORF105 [Leptopilina boulardi filamentous virus]
## 478 hypothetical protein LbFV_ORF44 [Leptopilina boulardi filamentous virus]
## 479 hypothetical protein LbFV_ORF45 [Leptopilina boulardi filamentous virus]
## 649 hypothetical protein LbFV_ORF22 [Leptopilina boulardi filamentous virus]
## 650 hypothetical protein LbFV_ORF12 [Leptopilina boulardi filamentous virus]
## 651 JmJC domain protein [Leptopilina boulardi filamentous virus]
## 652 hypothetical protein LbFV_ORF10 [Leptopilina boulardi filamentous virus]
## 653 hypothetical protein LbFV_ORF9 [Leptopilina boulardi filamentous virus]
## 654 hypothetical protein LbFV_ORF8 [Leptopilina boulardi filamentous virus]
## 655 hypothetical protein LbFV_ORF7 [Leptopilina boulardi filamentous virus]
## 656 hypothetical protein LbFV_ORF6 [Leptopilina boulardi filamentous virus]
## 657 hypothetical protein LbFV_ORF24 [Leptopilina boulardi filamentous virus]
## 658 hypothetical protein LbFV_ORF5 [Leptopilina boulardi filamentous virus]
## 659 hypothetical protein LbFV_ORF4 [Leptopilina boulardi filamentous virus]
## 660 hypothetical protein LbFV_ORF21 [Leptopilina boulardi filamentous virus]
## 661 hypothetical protein LbFV_ORF3 [Leptopilina boulardi filamentous virus]
## 662 hypothetical protein LbFV_ORF2 [Leptopilina boulardi filamentous virus]
## 663 hypothetical protein LbFV_ORF1 [Leptopilina boulardi filamentous virus]
## 664 hypothetical protein LbFV_ORF26 [Leptopilina boulardi filamentous virus]
## 665 hypothetical protein LbFV_ORF20 [Leptopilina boulardi filamentous virus]
## 666 hypothetical protein LbFV_ORF19 [Leptopilina boulardi filamentous virus]
## 667 hypothetical protein LbFV_ORF18 [Leptopilina boulardi filamentous virus]
## 668 hypothetical protein LbFV_ORF17 [Leptopilina boulardi filamentous virus]
## 669 hypothetical protein LbFV_ORF15 [Leptopilina boulardi filamentous virus]
## 670 hypothetical protein LbFV_ORF23 [Leptopilina boulardi filamentous virus]
## 671 BRO family [Leptopilina boulardi filamentous virus]
## 672 JmJC domain protein [Leptopilina boulardi filamentous virus]
## 673 hypothetical protein LbFV_ORF94 [Leptopilina boulardi filamentous virus]
## 674 hypothetical protein LbFV_ORF93 [Leptopilina boulardi filamentous virus]
## 675 hypothetical protein LbFV_ORF92 [Leptopilina boulardi filamentous virus]
## 676 hypothetical protein LbFV_ORF103 [Leptopilina boulardi filamentous virus]
## 677 hypothetical protein LbFV_ORF91 [Leptopilina boulardi filamentous virus]
## 678 hypothetical protein LbFV_ORF90 [Leptopilina boulardi filamentous virus]
## 679 hypothetical protein LbFV_ORF89 [Leptopilina boulardi filamentous virus]
## 680 hypothetical protein LbFV_ORF102 [Leptopilina boulardi filamentous virus]
## 681 hypothetical protein LbFV_ORF88 [Leptopilina boulardi filamentous virus]
## 682 hypothetical protein LbFV_ORF87 [Leptopilina boulardi filamentous virus]
## 683 hypothetical protein LbFV_ORF86 [Leptopilina boulardi filamentous virus]
## 684 hypothetical protein LbFV_ORF85 [Leptopilina boulardi filamentous virus]
## 685 MSV199 domain protein [Leptopilina boulardi filamentous virus]
## 686 hypothetical protein LbFV_ORF104 [Leptopilina boulardi filamentous virus]
## 687 hypothetical protein LbFV_ORF100 [Leptopilina boulardi filamentous virus]
## 688 hypothetical protein LbFV_ORF99 [Leptopilina boulardi filamentous virus]
## 689 hypothetical protein LbFV_ORF98 [Leptopilina boulardi filamentous virus]
## 690 hypothetical protein LbFV_ORF97 [Leptopilina boulardi filamentous virus]
## 691 hypothetical protein LbFV_ORF96 [Leptopilina boulardi filamentous virus]
## 692 hypothetical protein LbFV_ORF34 [Leptopilina boulardi filamentous virus]
## 693 hypothetical protein LbFV_ORF33 [Leptopilina boulardi filamentous virus]
## 694 hypothetical protein LbFV_ORF32 [Leptopilina boulardi filamentous virus]
## 695 hypothetical protein LbFV_ORF31 [Leptopilina boulardi filamentous virus]
## 696 hypothetical protein LbFV_ORF30 [Leptopilina boulardi filamentous virus]
## 697 hypothetical protein LbFV_ORF42 [Leptopilina boulardi filamentous virus]
## 698 hypothetical protein LbFV_ORF29 [Leptopilina boulardi filamentous virus]
## 699 hypothetical protein LbFV_ORF28 [Leptopilina boulardi filamentous virus]
## 700 putative inhibitor of apoptosis [Leptopilina boulardi filamentous virus]
## 701 hypothetical protein LbFV_ORF41 [Leptopilina boulardi filamentous virus]
## 702 hypothetical protein LbFV_ORF40 [Leptopilina boulardi filamentous virus]
## 703 hypothetical protein LbFV_ORF39 [Leptopilina boulardi filamentous virus]
## 704 hypothetical protein LbFV_ORF38 [Leptopilina boulardi filamentous virus]
## 705 hypothetical protein LbFV_ORF43 [Leptopilina boulardi filamentous virus]
## 706 nudix domain protein [Leptopilina boulardi filamentous virus]
## 707 hypothetical protein LbFV_ORF36 [Leptopilina boulardi filamentous virus]
## 708 hypothetical protein LbFV_ORF35 [Leptopilina boulardi filamentous virus]
## 709 putative deoxynucleoside kinase [Leptopilina boulardi filamentous virus]
## 710 putative lecithin:cholesterol acyltransferase [Leptopilina boulardi filamentous virus]
## 711 putative deoxynucleoside kinase [Leptopilina boulardi filamentous virus]
## 712 hypothetical protein LbFV_ORF59 [Leptopilina boulardi filamentous virus]
## 713 putative DNA pol [Leptopilina boulardi filamentous virus]
## 714 hypothetical protein LbFV_ORF57 [Leptopilina boulardi filamentous virus]
## 715 hypothetical protein LbFV_ORF56 [Leptopilina boulardi filamentous virus]
## 716 hypothetical protein LbFV_ORF55 [Leptopilina boulardi filamentous virus]
## 717 hypothetical protein LbFV_ORF54 [Leptopilina boulardi filamentous virus]
## 718 hypothetical protein LbFV_ORF53 [Leptopilina boulardi filamentous virus]
## 719 hypothetical protein LbFV_ORF52 [Leptopilina boulardi filamentous virus]
## 720 hypothetical protein LbFV_ORF51 [Leptopilina boulardi filamentous virus]
## 732 hypothetical protein LbFV_ORF79 [Leptopilina boulardi filamentous virus]
## 733 hypothetical protein LbFV_ORF77 [Leptopilina boulardi filamentous virus]
## 734 hypothetical protein LbFV_ORF80 [Leptopilina boulardi filamentous virus]
## 735 putative ATPase [Leptopilina boulardi filamentous virus]
## 736 hypothetical protein LbFV_ORF82 [Leptopilina boulardi filamentous virus]
## 737 hypothetical protein LbFV_ORF83 [Leptopilina boulardi filamentous virus]
## 738 hypothetical protein LbFV_ORF24 [Leptopilina boulardi filamentous virus]
## 739 hypothetical protein LbFV_ORF78 [Leptopilina boulardi filamentous virus]
## 740 hypothetical protein LbFV_ORF23 [Leptopilina boulardi filamentous virus]
## 763 hypothetical protein LbFV_ORF62 [Leptopilina boulardi filamentous virus]
## 764 hypothetical protein LbFV_ORF64 [Leptopilina boulardi filamentous virus]
## 765 mucin-like protein [Leptopilina boulardi filamentous virus]
## 766 hypothetical protein LbFV_ORF63 [Leptopilina boulardi filamentous virus]
contig_set=paste0("contig_",c(9355, 21206, 19696, 3127, 356, 682, 22485, 223,701,22588, 2709, 19153))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'LhFV_L.h'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "LhFV")
## orf_name seqid source type start end score
## 477 contig_19153_139_1095_- contig_19153 getorf_JV gene 139 1095 .
## 489 contig_19696_103_1077_- contig_19696 getorf_JV gene 103 1077 .
## 564 contig_21206_201_422_- contig_21206 getorf_JV gene 201 422 .
## 565 contig_21206_720_917_+ contig_21206 getorf_JV gene 720 917 .
## 617 contig_223_10243_11955_+ contig_223 getorf_JV gene 10243 11955 .
## 618 contig_223_12586_14052_+ contig_223 getorf_JV gene 12586 14052 .
## 619 contig_223_14076_14873_+ contig_223 getorf_JV gene 14076 14873 .
## 620 contig_223_14890_15255_- contig_223 getorf_JV gene 14890 15255 .
## 621 contig_223_15439_15909_- contig_223 getorf_JV gene 15439 15909 .
## 622 contig_223_15905_16648_- contig_223 getorf_JV gene 15905 16648 .
## 623 contig_223_16672_17559_- contig_223 getorf_JV gene 16672 17559 .
## 624 contig_223_18129_18755_- contig_223 getorf_JV gene 18129 18755 .
## 625 contig_223_18984_19181_+ contig_223 getorf_JV gene 18984 19181 .
## 626 contig_223_19393_21060_+ contig_223 getorf_JV gene 19393 21060 .
## 627 contig_223_21089_21451_+ contig_223 getorf_JV gene 21089 21451 .
## 628 contig_223_21908_22192_+ contig_223 getorf_JV gene 21908 22192 .
## 629 contig_223_22185_22454_+ contig_223 getorf_JV gene 22185 22454 .
## 630 contig_223_22622_22777_- contig_223 getorf_JV gene 22622 22777 .
## 631 contig_223_22899_23939_- contig_223 getorf_JV gene 22899 23939 .
## 632 contig_223_2316_4085_+ contig_223 getorf_JV gene 2316 4085 .
## 633 contig_223_24326_24619_- contig_223 getorf_JV gene 24326 24619 .
## 634 contig_223_24658_25089_- contig_223 getorf_JV gene 24658 25089 .
## 635 contig_223_25085_25825_- contig_223 getorf_JV gene 25085 25825 .
## 636 contig_223_26022_26183_- contig_223 getorf_JV gene 26022 26183 .
## 637 contig_223_26330_26608_- contig_223 getorf_JV gene 26330 26608 .
## 638 contig_223_26647_28743_+ contig_223 getorf_JV gene 26647 28743 .
## 639 contig_223_28886_29104_+ contig_223 getorf_JV gene 28886 29104 .
## 640 contig_223_4284_4457_+ contig_223 getorf_JV gene 4284 4457 .
## 641 contig_223_4474_4782_+ contig_223 getorf_JV gene 4474 4782 .
## 642 contig_223_4766_5794_+ contig_223 getorf_JV gene 4766 5794 .
## 643 contig_223_5855_7516_+ contig_223 getorf_JV gene 5855 7516 .
## 644 contig_223_7581_8423_- contig_223 getorf_JV gene 7581 8423 .
## 645 contig_223_8456_8947_- contig_223 getorf_JV gene 8456 8947 .
## 646 contig_223_85_2166_- contig_223 getorf_JV gene 85 2166 .
## 647 contig_223_8954_9118_- contig_223 getorf_JV gene 8954 9118 .
## 648 contig_223_9219_10037_+ contig_223 getorf_JV gene 9219 10037 .
## 721 contig_22485_10256_10753_+ contig_22485 getorf_JV gene 10256 10753 .
## 722 contig_22485_1672_1980_+ contig_22485 getorf_JV gene 1672 1980 .
## 723 contig_22485_2_235_+ contig_22485 getorf_JV gene 2 235 .
## 724 contig_22485_2089_3477_+ contig_22485 getorf_JV gene 2089 3477 .
## 725 contig_22485_331_1332_+ contig_22485 getorf_JV gene 331 1332 .
## 726 contig_22485_3511_6681_- contig_22485 getorf_JV gene 3511 6681 .
## 727 contig_22485_6697_7524_+ contig_22485 getorf_JV gene 6697 7524 .
## 728 contig_22485_7546_8424_+ contig_22485 getorf_JV gene 7546 8424 .
## 729 contig_22485_8462_9043_+ contig_22485 getorf_JV gene 8462 9043 .
## 730 contig_22485_9131_9835_+ contig_22485 getorf_JV gene 9131 9835 .
## 731 contig_22485_9897_10253_+ contig_22485 getorf_JV gene 9897 10253 .
## 741 contig_22588_1466_2713_+ contig_22588 getorf_JV gene 1466 2713 .
## 742 contig_22588_2922_3827_+ contig_22588 getorf_JV gene 2922 3827 .
## 743 contig_22588_314_511_- contig_22588 getorf_JV gene 314 511 .
## 744 contig_22588_3876_4133_- contig_22588 getorf_JV gene 3876 4133 .
## 745 contig_22588_4337_4621_- contig_22588 getorf_JV gene 4337 4621 .
## 746 contig_22588_4626_5651_- contig_22588 getorf_JV gene 4626 5651 .
## 747 contig_22588_495_713_- contig_22588 getorf_JV gene 495 713 .
## 748 contig_22588_5708_7849_+ contig_22588 getorf_JV gene 5708 7849 .
## 749 contig_22588_713_1453_- contig_22588 getorf_JV gene 713 1453 .
## 816 contig_2709_183_2489_- contig_2709 getorf_JV gene 183 2489 .
## 817 contig_2709_2485_3339_- contig_2709 getorf_JV gene 2485 3339 .
## 818 contig_2709_3512_3682_+ contig_2709 getorf_JV gene 3512 3682 .
## 819 contig_2709_3724_4242_+ contig_2709 getorf_JV gene 3724 4242 .
## 854 contig_3127_1203_2399_+ contig_3127 getorf_JV gene 1203 2399 .
## 855 contig_3127_21_173_- contig_3127 getorf_JV gene 21 173 .
## 856 contig_3127_230_1123_+ contig_3127 getorf_JV gene 230 1123 .
## 857 contig_3127_2426_2989_+ contig_3127 getorf_JV gene 2426 2989 .
## 858 contig_3127_3036_3938_+ contig_3127 getorf_JV gene 3036 3938 .
## 893 contig_356_11400_11675_+ contig_356 getorf_JV gene 11400 11675 .
## 894 contig_356_11866_12045_+ contig_356 getorf_JV gene 11866 12045 .
## 895 contig_356_12432_13463_- contig_356 getorf_JV gene 12432 13463 .
## 896 contig_356_129_323_+ contig_356 getorf_JV gene 129 323 .
## 897 contig_356_13522_18744_- contig_356 getorf_JV gene 13522 18744 .
## 898 contig_356_1416_2081_+ contig_356 getorf_JV gene 1416 2081 .
## 899 contig_356_18732_18896_- contig_356 getorf_JV gene 18732 18896 .
## 900 contig_356_19208_20314_+ contig_356 getorf_JV gene 19208 20314 .
## 901 contig_356_20314_20475_- contig_356 getorf_JV gene 20314 20475 .
## 902 contig_356_2116_6132_+ contig_356 getorf_JV gene 2116 6132 .
## 903 contig_356_375_1388_- contig_356 getorf_JV gene 375 1388 .
## 904 contig_356_6373_8277_+ contig_356 getorf_JV gene 6373 8277 .
## 905 contig_356_8294_9499_+ contig_356 getorf_JV gene 8294 9499 .
## 906 contig_356_9543_11363_- contig_356 getorf_JV gene 9543 11363 .
## 1054 contig_682_10487_11527_+ contig_682 getorf_JV gene 10487 11527 .
## 1055 contig_682_11514_12587_- contig_682 getorf_JV gene 11514 12587 .
## 1056 contig_682_143_523_- contig_682 getorf_JV gene 143 523 .
## 1057 contig_682_1632_3863_+ contig_682 getorf_JV gene 1632 3863 .
## 1058 contig_682_4002_4976_- contig_682 getorf_JV gene 4002 4976 .
## 1059 contig_682_5074_6042_- contig_682 getorf_JV gene 5074 6042 .
## 1060 contig_682_553_1530_+ contig_682 getorf_JV gene 553 1530 .
## 1061 contig_682_6051_6554_- contig_682 getorf_JV gene 6051 6554 .
## 1062 contig_682_6594_6875_- contig_682 getorf_JV gene 6594 6875 .
## 1063 contig_682_6900_7913_- contig_682 getorf_JV gene 6900 7913 .
## 1064 contig_682_8085_8537_+ contig_682 getorf_JV gene 8085 8537 .
## 1065 contig_682_8614_9309_- contig_682 getorf_JV gene 8614 9309 .
## 1066 contig_682_9399_9851_+ contig_682 getorf_JV gene 9399 9851 .
## 1067 contig_682_9856_10461_+ contig_682 getorf_JV gene 9856 10461 .
## 1068 contig_701_10875_11123_- contig_701 getorf_JV gene 10875 11123 .
## 1069 contig_701_11309_12256_+ contig_701 getorf_JV gene 11309 12256 .
## 1070 contig_701_577_804_- contig_701 getorf_JV gene 577 804 .
## 1071 contig_701_6141_6950_- contig_701 getorf_JV gene 6141 6950 .
## 1072 contig_701_7408_7818_- contig_701 getorf_JV gene 7408 7818 .
## 1073 contig_701_7954_9372_- contig_701 getorf_JV gene 7954 9372 .
## 1074 contig_701_800_6073_- contig_701 getorf_JV gene 800 6073 .
## 1075 contig_701_9410_9571_- contig_701 getorf_JV gene 9410 9571 .
## 1076 contig_701_9683_10882_- contig_701 getorf_JV gene 9683 10882 .
## 1253 contig_9355_1174_1404_- contig_9355 getorf_JV gene 1174 1404 .
## 1254 contig_9355_1524_1712_- contig_9355 getorf_JV gene 1524 1712 .
## 1255 contig_9355_190_942_- contig_9355 getorf_JV gene 190 942 .
## strand phase attributes seq_length subject_id identity
## 477 FALSE 1 957 1096 QKN22475.1 0.965
## 489 FALSE 1 975 1079 QKN22518.1 0.993
## 564 FALSE 1 222 1032 <NA> NA
## 565 TRUE 1 198 1032 <NA> NA
## 617 TRUE 1 1713 29250 QKN22474.1 0.942
## 618 TRUE 1 1467 29250 QKN22475.1 0.946
## 619 TRUE 1 798 29250 QKN22476.1 0.943
## 620 FALSE 1 366 29250 QKN22472.1 0.975
## 621 FALSE 1 471 29250 QKN22471.1 0.979
## 622 FALSE 1 744 29250 QKN22470.1 0.951
## 623 FALSE 1 888 29250 QKN22469.1 0.972
## 624 FALSE 1 627 29250 QKN22467.1 0.980
## 625 TRUE 1 198 29250 <NA> NA
## 626 TRUE 1 1668 29250 QKN22477.1 0.982
## 627 TRUE 1 363 29250 <NA> NA
## 628 TRUE 1 285 29250 YP_009345614.1 0.425
## 629 TRUE 1 270 29250 <NA> NA
## 630 FALSE 1 156 29250 <NA> NA
## 631 FALSE 1 1041 29250 QKN22465.1 0.974
## 632 TRUE 1 1770 29250 QKN22521.1 0.993
## 633 FALSE 1 294 29250 <NA> NA
## 634 FALSE 1 432 29250 <NA> NA
## 635 FALSE 1 741 29250 <NA> NA
## 636 FALSE 1 162 29250 <NA> NA
## 637 FALSE 1 279 29250 <NA> NA
## 638 TRUE 1 2097 29250 YP_009345672.1 0.349
## 639 TRUE 1 219 29250 QKN22507.1 0.731
## 640 TRUE 1 174 29250 <NA> NA
## 641 TRUE 1 309 29250 <NA> NA
## 642 TRUE 1 1029 29250 <NA> NA
## 643 TRUE 1 1662 29250 YP_009345685.1 0.346
## 644 FALSE 1 843 29250 YP_009345671.1 0.333
## 645 FALSE 1 492 29250 <NA> NA
## 646 FALSE 1 2082 29250 QKN22522.1 0.985
## 647 FALSE 1 165 29250 <NA> NA
## 648 TRUE 1 819 29250 QKN22473.1 0.995
## 721 TRUE 1 498 10753 QKN22515.1 1.000
## 722 TRUE 1 309 10753 QKN22501.1 1.000
## 723 TRUE 1 234 10753 QKN22499.1 0.853
## 724 TRUE 1 1389 10753 QKN22502.1 0.989
## 725 TRUE 1 1002 10753 QKN22500.1 0.991
## 726 FALSE 1 3171 10753 QKN22516.1 0.982
## 727 TRUE 1 828 10753 QKN22510.1 0.960
## 728 TRUE 1 879 10753 QKN22511.1 0.993
## 729 TRUE 1 582 10753 QKN22512.1 0.989
## 730 TRUE 1 705 10753 QKN22513.1 0.987
## 731 TRUE 1 357 10753 QKN22514.1 0.957
## 741 TRUE 1 1248 7997 QKN22504.1 0.987
## 742 TRUE 1 906 7997 QKN22505.1 1.000
## 743 FALSE 1 198 7997 <NA> NA
## 744 FALSE 1 258 7997 <NA> NA
## 745 FALSE 1 285 7997 <NA> NA
## 746 FALSE 1 1026 7997 QKN22508.1 0.979
## 747 FALSE 1 219 7997 <NA> NA
## 748 TRUE 1 2142 7997 QKN22506.1 0.976
## 749 FALSE 1 741 7997 QKN22509.1 0.995
## 816 FALSE 1 2307 4458 QKN22456.1 0.990
## 817 FALSE 1 855 4458 QKN22455.1 0.989
## 818 TRUE 1 171 4458 <NA> NA
## 819 TRUE 1 519 4458 XP_002083176.1 0.396
## 854 TRUE 1 1197 3938 QKN22497.1 0.992
## 855 FALSE 1 153 3938 <NA> NA
## 856 TRUE 1 894 3938 QKN22496.1 0.993
## 857 TRUE 1 564 3938 QKN22498.1 0.984
## 858 TRUE 1 903 3938 QKN22499.1 0.858
## 893 TRUE 1 276 20719 <NA> NA
## 894 TRUE 1 180 20719 <NA> NA
## 895 FALSE 1 1032 20719 QKN22479.1 0.916
## 896 TRUE 1 195 20719 <NA> NA
## 897 FALSE 1 5223 20719 QKN22478.1 0.977
## 898 TRUE 1 666 20719 QKN22517.1 0.977
## 899 FALSE 1 165 20719 <NA> NA
## 900 TRUE 1 1107 20719 QKN22483.1 0.981
## 901 FALSE 1 162 20719 <NA> NA
## 902 TRUE 1 4017 20719 QKN22518.1 0.878
## 903 FALSE 1 1014 20719 QKN22519.1 0.991
## 904 TRUE 1 1905 20719 <NA> NA
## 905 TRUE 1 1206 20719 YP_009345656.1 0.289
## 906 FALSE 1 1821 20719 QKN22481.1 0.978
## 1054 TRUE 1 1041 12802 YP_009345706.1 0.296
## 1055 FALSE 1 1074 12802 <NA> NA
## 1056 FALSE 1 381 12802 <NA> NA
## 1057 TRUE 1 2232 12802 QKN22485.1 0.979
## 1058 FALSE 1 975 12802 QKN22492.1 0.933
## 1059 FALSE 1 969 12802 QKN22491.1 0.987
## 1060 TRUE 1 978 12802 QKN22484.1 0.957
## 1061 FALSE 1 504 12802 QKN22490.1 1.000
## 1062 FALSE 1 282 12802 <NA> NA
## 1063 FALSE 1 1014 12802 QKN22489.1 0.970
## 1064 TRUE 1 453 12802 QKN22487.1 0.993
## 1065 FALSE 1 696 12802 QKN22488.1 0.990
## 1066 TRUE 1 453 12802 <NA> NA
## 1067 TRUE 1 606 12802 <NA> NA
## 1068 FALSE 1 249 12482 <NA> NA
## 1069 TRUE 1 948 12482 QKN22463.1 0.958
## 1070 FALSE 1 228 12482 <NA> NA
## 1071 FALSE 1 810 12482 QKN22460.1 0.977
## 1072 FALSE 1 411 12482 QKN22459.1 0.978
## 1073 FALSE 1 1419 12482 QKN22458.1 0.987
## 1074 FALSE 1 5274 12482 QKN22461.1 0.977
## 1075 FALSE 1 162 12482 <NA> NA
## 1076 FALSE 1 1200 12482 QKN22457.1 0.990
## 1253 FALSE 1 231 1744 QKN22523.1 0.680
## 1254 FALSE 1 189 1744 QKN22523.1 0.677
## 1255 FALSE 1 753 1744 QKN22523.1 0.988
## alignment_length mismatches gap_opens qstart qend sstart send
## 477 319 11 0 1 319 161 473
## 489 295 2 0 1 295 959 1252
## 564 NA NA NA NA NA NA NA
## 565 NA NA NA NA NA NA NA
## 617 571 33 0 1 571 1 570
## 618 489 25 0 1 489 1 473
## 619 266 15 0 1 266 1 259
## 620 122 3 0 1 122 1 122
## 621 146 3 0 8 153 2 147
## 622 248 12 0 1 248 1 238
## 623 296 8 0 1 296 1 291
## 624 209 4 0 1 209 1 207
## 625 NA NA NA NA NA NA NA
## 626 556 10 0 1 556 1 556
## 627 NA NA NA NA NA NA NA
## 628 47 26 0 41 87 38 84
## 629 NA NA NA NA NA NA NA
## 630 NA NA NA NA NA NA NA
## 631 347 9 0 1 347 1 346
## 632 589 4 0 1 589 1 588
## 633 NA NA NA NA NA NA NA
## 634 NA NA NA NA NA NA NA
## 635 NA NA NA NA NA NA NA
## 636 NA NA NA NA NA NA NA
## 637 NA NA NA NA NA NA NA
## 638 615 384 0 83 697 52 642
## 639 67 16 0 7 68 35 101
## 640 NA NA NA NA NA NA NA
## 641 NA NA NA NA NA NA NA
## 642 NA NA NA NA NA NA NA
## 643 457 297 0 92 548 78 532
## 644 287 185 0 3 281 13 299
## 645 NA NA NA NA NA NA NA
## 646 694 10 0 1 694 1 693
## 647 NA NA NA NA NA NA NA
## 648 244 1 0 30 273 1 244
## 721 166 0 0 1 166 1 166
## 722 102 0 0 1 102 1 102
## 723 73 10 0 6 78 314 385
## 724 463 5 0 1 463 1 462
## 725 334 3 0 1 334 1 334
## 726 704 13 0 1 704 30 728
## 727 276 11 0 1 276 1 269
## 728 292 2 0 1 292 1 292
## 729 194 2 0 1 194 1 194
## 730 235 3 0 1 235 1 233
## 731 119 5 0 1 119 1 116
## 741 416 5 0 1 416 1 412
## 742 302 0 0 1 302 1 302
## 743 NA NA NA NA NA NA NA
## 744 NA NA NA NA NA NA NA
## 745 NA NA NA NA NA NA NA
## 746 346 7 0 1 342 1 346
## 747 NA NA NA NA NA NA NA
## 748 714 17 0 1 714 19 727
## 749 247 1 0 1 247 1 247
## 816 769 8 0 1 769 1 769
## 817 285 3 0 1 285 1 285
## 818 NA NA NA NA NA NA NA
## 819 101 60 0 35 134 30 130
## 854 399 3 0 1 399 1 399
## 855 NA NA NA NA NA NA NA
## 856 298 2 0 1 298 1 298
## 857 188 3 0 1 188 1 188
## 858 333 43 0 1 301 1 333
## 893 NA NA NA NA NA NA NA
## 894 NA NA NA NA NA NA NA
## 895 349 29 0 1 344 1 349
## 896 NA NA NA NA NA NA NA
## 897 1747 40 0 1 1741 1 1747
## 898 222 5 0 1 222 1 218
## 899 NA NA NA NA NA NA NA
## 900 369 7 0 1 369 1 369
## 901 NA NA NA NA NA NA NA
## 902 1271 153 0 39 1309 1 1252
## 903 338 3 0 1 338 1 338
## 904 NA NA NA NA NA NA NA
## 905 389 247 0 9 397 11 358
## 906 617 13 0 1 607 1 617
## 1054 347 235 0 3 337 5 351
## 1055 NA NA NA NA NA NA NA
## 1056 NA NA NA NA NA NA NA
## 1057 744 15 0 1 744 1 737
## 1058 316 21 0 1 314 1 316
## 1059 323 4 0 1 323 1 320
## 1060 326 14 0 1 326 1 326
## 1061 168 0 0 1 168 1 168
## 1062 NA NA NA NA NA NA NA
## 1063 338 10 0 1 338 1 335
## 1064 151 1 0 1 151 1 151
## 1065 104 1 0 129 232 1 104
## 1066 NA NA NA NA NA NA NA
## 1067 NA NA NA NA NA NA NA
## 1068 NA NA NA NA NA NA NA
## 1069 316 13 0 1 316 1 316
## 1070 NA NA NA NA NA NA NA
## 1071 270 6 0 1 270 1 270
## 1072 137 3 0 1 137 1 137
## 1073 473 6 0 1 473 1 473
## 1074 1762 40 0 1 1758 1 1762
## 1075 NA NA NA NA NA NA NA
## 1076 400 4 0 1 400 1 398
## 1253 73 22 0 1 69 54 126
## 1254 57 17 0 4 58 103 159
## 1255 251 3 0 1 251 1 251
## evalue bitscore
## 477 1.310000e-201 624
## 489 9.019000e-192 596
## 564 NA NA
## 565 NA NA
## 617 2.574082e-321 984
## 618 2.527000e-295 904
## 619 1.383000e-152 480
## 620 4.059000e-75 248
## 621 9.200000e-93 301
## 622 4.351000e-153 480
## 623 1.220000e-182 568
## 624 7.725000e-122 388
## 625 NA NA
## 626 0.000000e+00 1126
## 627 NA NA
## 628 9.676000e-06 47
## 629 NA NA
## 630 NA NA
## 631 2.014000e-218 674
## 632 0.000000e+00 1204
## 633 NA NA
## 634 NA NA
## 635 NA NA
## 636 NA NA
## 637 NA NA
## 638 1.353000e-100 349
## 639 1.973000e-22 94
## 640 NA NA
## 641 NA NA
## 642 NA NA
## 643 6.465000e-67 244
## 644 8.672000e-39 151
## 645 NA NA
## 646 0.000000e+00 1401
## 647 NA NA
## 648 2.505000e-145 459
## 721 6.913000e-109 348
## 722 4.516000e-62 209
## 723 1.145000e-24 100
## 724 1.210000e-303 927
## 725 1.297000e-223 689
## 726 0.000000e+00 1390
## 727 3.561000e-157 494
## 728 1.155000e-196 608
## 729 1.959000e-125 397
## 730 9.427000e-151 473
## 731 6.826000e-58 198
## 741 9.498000e-269 824
## 742 1.090000e-191 595
## 743 NA NA
## 744 NA NA
## 745 NA NA
## 746 1.448000e-217 672
## 747 NA NA
## 748 0.000000e+00 1448
## 749 1.724000e-157 493
## 816 0.000000e+00 1535
## 817 2.571000e-182 567
## 818 NA NA
## 819 1.455000e-21 96
## 854 4.731000e-270 826
## 855 NA NA
## 856 2.112000e-190 591
## 857 5.552000e-119 378
## 858 2.016000e-175 548
## 893 NA NA
## 894 NA NA
## 895 3.993000e-207 642
## 896 NA NA
## 897 0.000000e+00 3397
## 898 1.080000e-138 437
## 899 NA NA
## 900 7.696000e-244 749
## 901 NA NA
## 902 0.000000e+00 2224
## 903 1.898000e-225 694
## 904 NA NA
## 905 7.797000e-40 159
## 906 0.000000e+00 1211
## 1054 5.965000e-32 134
## 1055 NA NA
## 1056 NA NA
## 1057 0.000000e+00 1435
## 1058 6.033000e-162 510
## 1059 7.103000e-197 611
## 1060 5.392000e-209 646
## 1061 1.043000e-108 347
## 1062 NA NA
## 1063 1.570000e-210 651
## 1064 2.352000e-95 308
## 1065 2.531000e-58 206
## 1066 NA NA
## 1067 NA NA
## 1068 NA NA
## 1069 3.813000e-207 640
## 1070 NA NA
## 1071 4.182000e-178 554
## 1072 3.896000e-77 255
## 1073 1.345000e-309 945
## 1074 0.000000e+00 3413
## 1075 NA NA
## 1076 9.075000e-260 797
## 1253 4.144000e-17 79
## 1254 8.819000e-15 71
## 1255 2.666000e-153 481
## annotation
## 477 putative protein 21 [Drosophila-associated filamentous virus]
## 489 putative DNA PolB, partial [Drosophila-associated filamentous virus]
## 564 <NA>
## 565 <NA>
## 617 putative protein 20 [Drosophila-associated filamentous virus]
## 618 putative protein 21 [Drosophila-associated filamentous virus]
## 619 putative protein 22 [Drosophila-associated filamentous virus]
## 620 putative protein 18 [Drosophila-associated filamentous virus]
## 621 putative protein 17 [Drosophila-associated filamentous virus]
## 622 putative ORF19 [Drosophila-associated filamentous virus]
## 623 putative protein 15 [Drosophila-associated filamentous virus]
## 624 putative protein 13 [Drosophila-associated filamentous virus]
## 625 <NA>
## 626 putative JmJC domain protein [Drosophila-associated filamentous virus]
## 627 <NA>
## 628 hypothetical protein LbFV_ORF10 [Leptopilina boulardi filamentous virus]
## 629 <NA>
## 630 <NA>
## 631 putative protein 11 [Drosophila-associated filamentous virus]
## 632 putative ORF107 [Drosophila-associated filamentous virus]
## 633 <NA>
## 634 <NA>
## 635 <NA>
## 636 <NA>
## 637 <NA>
## 638 hypothetical protein LbFV_ORF68 [Leptopilina boulardi filamentous virus]
## 639 putative protein 53 [Drosophila-associated filamentous virus]
## 640 <NA>
## 641 <NA>
## 642 <NA>
## 643 putative ATPase [Leptopilina boulardi filamentous virus]
## 644 hypothetical protein LbFV_ORF67 [Leptopilina boulardi filamentous virus]
## 645 <NA>
## 646 putative protein 65 [Drosophila-associated filamentous virus]
## 647 <NA>
## 648 putative protein 19 [Drosophila-associated filamentous virus]
## 721 putative protein 61 [Drosophila-associated filamentous virus]
## 722 putative protein 47 [Drosophila-associated filamentous virus]
## 723 putative protein 45 [Drosophila-associated filamentous virus]
## 724 putative protein 48 [Drosophila-associated filamentous virus]
## 725 putative ORF43 [Drosophila-associated filamentous virus]
## 726 putative ORF96, partial [Drosophila-associated filamentous virus]
## 727 putative protein 56 [Drosophila-associated filamentous virus]
## 728 putative protein 57 [Drosophila-associated filamentous virus]
## 729 putative protein 58 [Drosophila-associated filamentous virus]
## 730 putative Ac81-like protein [Drosophila-associated filamentous virus]
## 731 putative protein 60 [Drosophila-associated filamentous virus]
## 741 putative lecithine cholesterol acyltransferase [Drosophila-associated filamentous virus]
## 742 putative protein 51 [Drosophila-associated filamentous virus]
## 743 <NA>
## 744 <NA>
## 745 <NA>
## 746 putative protein 54 [Drosophila-associated filamentous virus]
## 747 <NA>
## 748 PIF1-like protein [Drosophila-associated filamentous virus]
## 749 putative protein 55 [Drosophila-associated filamentous virus]
## 816 P74-like protein [Drosophila-associated filamentous virus]
## 817 putative protein 1 [Drosophila-associated filamentous virus]
## 818 <NA>
## 819 lysozyme X [Drosophila simulans]
## 854 putative ORF20 [Drosophila-associated filamentous virus]
## 855 <NA>
## 856 putative protein 42 [Drosophila-associated filamentous virus]
## 857 putative protein 44 [Drosophila-associated filamentous virus]
## 858 putative protein 45 [Drosophila-associated filamentous virus]
## 893 <NA>
## 894 <NA>
## 895 putative protein 25 [Drosophila-associated filamentous virus]
## 896 <NA>
## 897 PIF2-like protein [Drosophila-associated filamentous virus]
## 898 putative protein 67 [Drosophila-associated filamentous virus]
## 899 <NA>
## 900 putative protein 29 [Drosophila-associated filamentous virus]
## 901 <NA>
## 902 putative DNA PolB, partial [Drosophila-associated filamentous virus]
## 903 putative protein 69 [Drosophila-associated filamentous virus]
## 904 <NA>
## 905 hypothetical protein LbFV_ORF52 [Leptopilina boulardi filamentous virus]
## 906 putative protein 27 [Drosophila-associated filamentous virus]
## 1054 hypothetical protein LbFV_ORF102 [Leptopilina boulardi filamentous virus]
## 1055 <NA>
## 1056 <NA>
## 1057 putative ORF5 [Drosophila-associated filamentous virus]
## 1058 putative protein 38 [Drosophila-associated filamentous virus]
## 1059 putative protein 37 [Drosophila-associated filamentous virus]
## 1060 putative protein 30 [Drosophila-associated filamentous virus]
## 1061 putative protein 36 [Drosophila-associated filamentous virus]
## 1062 <NA>
## 1063 putative ORF24 [Drosophila-associated filamentous virus]
## 1064 putative protein 33 [Drosophila-associated filamentous virus]
## 1065 putative protein 34 [Drosophila-associated filamentous virus]
## 1066 <NA>
## 1067 <NA>
## 1068 <NA>
## 1069 putative ORF1 [Drosophila-associated filamentous virus]
## 1070 <NA>
## 1071 putative nudix domain protein [Drosophila-associated filamentous virus]
## 1072 putative protein 5 [Drosophila-associated filamentous virus]
## 1073 putative protein 4 [Drosophila-associated filamentous virus]
## 1074 putative protein 7 [Drosophila-associated filamentous virus]
## 1075 <NA>
## 1076 putative ORF2 [Drosophila-associated filamentous virus]
## 1253 ODV-E66-like protein [Drosophila-associated filamentous virus]
## 1254 ODV-E66-like protein [Drosophila-associated filamentous virus]
## 1255 ODV-E66-like protein [Drosophila-associated filamentous virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score
## 477 contig_19153_139_1095_- contig_19153 getorf_JV gene 139 1095 .
## 489 contig_19696_103_1077_- contig_19696 getorf_JV gene 103 1077 .
## 564 contig_21206_201_422_- contig_21206 getorf_JV gene 201 422 .
## 565 contig_21206_720_917_+ contig_21206 getorf_JV gene 720 917 .
## 617 contig_223_10243_11955_+ contig_223 getorf_JV gene 10243 11955 .
## 618 contig_223_12586_14052_+ contig_223 getorf_JV gene 12586 14052 .
## 619 contig_223_14076_14873_+ contig_223 getorf_JV gene 14076 14873 .
## 620 contig_223_14890_15255_- contig_223 getorf_JV gene 14890 15255 .
## 621 contig_223_15439_15909_- contig_223 getorf_JV gene 15439 15909 .
## 622 contig_223_15905_16648_- contig_223 getorf_JV gene 15905 16648 .
## 623 contig_223_16672_17559_- contig_223 getorf_JV gene 16672 17559 .
## 624 contig_223_18129_18755_- contig_223 getorf_JV gene 18129 18755 .
## 625 contig_223_18984_19181_+ contig_223 getorf_JV gene 18984 19181 .
## 626 contig_223_19393_21060_+ contig_223 getorf_JV gene 19393 21060 .
## 627 contig_223_21089_21451_+ contig_223 getorf_JV gene 21089 21451 .
## 628 contig_223_21908_22192_+ contig_223 getorf_JV gene 21908 22192 .
## 629 contig_223_22185_22454_+ contig_223 getorf_JV gene 22185 22454 .
## 630 contig_223_22622_22777_- contig_223 getorf_JV gene 22622 22777 .
## 631 contig_223_22899_23939_- contig_223 getorf_JV gene 22899 23939 .
## 632 contig_223_2316_4085_+ contig_223 getorf_JV gene 2316 4085 .
## 633 contig_223_24326_24619_- contig_223 getorf_JV gene 24326 24619 .
## 634 contig_223_24658_25089_- contig_223 getorf_JV gene 24658 25089 .
## 635 contig_223_25085_25825_- contig_223 getorf_JV gene 25085 25825 .
## 636 contig_223_26022_26183_- contig_223 getorf_JV gene 26022 26183 .
## 637 contig_223_26330_26608_- contig_223 getorf_JV gene 26330 26608 .
## 638 contig_223_26647_28743_+ contig_223 getorf_JV gene 26647 28743 .
## 639 contig_223_28886_29104_+ contig_223 getorf_JV gene 28886 29104 .
## 640 contig_223_4284_4457_+ contig_223 getorf_JV gene 4284 4457 .
## 641 contig_223_4474_4782_+ contig_223 getorf_JV gene 4474 4782 .
## 642 contig_223_4766_5794_+ contig_223 getorf_JV gene 4766 5794 .
## 643 contig_223_5855_7516_+ contig_223 getorf_JV gene 5855 7516 .
## 644 contig_223_7581_8423_- contig_223 getorf_JV gene 7581 8423 .
## 645 contig_223_8456_8947_- contig_223 getorf_JV gene 8456 8947 .
## 646 contig_223_85_2166_- contig_223 getorf_JV gene 85 2166 .
## 647 contig_223_8954_9118_- contig_223 getorf_JV gene 8954 9118 .
## 648 contig_223_9219_10037_+ contig_223 getorf_JV gene 9219 10037 .
## 721 contig_22485_10256_10753_+ contig_22485 getorf_JV gene 10256 10753 .
## 722 contig_22485_1672_1980_+ contig_22485 getorf_JV gene 1672 1980 .
## 723 contig_22485_2_235_+ contig_22485 getorf_JV gene 2 235 .
## 724 contig_22485_2089_3477_+ contig_22485 getorf_JV gene 2089 3477 .
## 725 contig_22485_331_1332_+ contig_22485 getorf_JV gene 331 1332 .
## 726 contig_22485_3511_6681_- contig_22485 getorf_JV gene 3511 6681 .
## 727 contig_22485_6697_7524_+ contig_22485 getorf_JV gene 6697 7524 .
## 728 contig_22485_7546_8424_+ contig_22485 getorf_JV gene 7546 8424 .
## 729 contig_22485_8462_9043_+ contig_22485 getorf_JV gene 8462 9043 .
## 730 contig_22485_9131_9835_+ contig_22485 getorf_JV gene 9131 9835 .
## 731 contig_22485_9897_10253_+ contig_22485 getorf_JV gene 9897 10253 .
## 741 contig_22588_1466_2713_+ contig_22588 getorf_JV gene 1466 2713 .
## 742 contig_22588_2922_3827_+ contig_22588 getorf_JV gene 2922 3827 .
## 743 contig_22588_314_511_- contig_22588 getorf_JV gene 314 511 .
## 744 contig_22588_3876_4133_- contig_22588 getorf_JV gene 3876 4133 .
## 745 contig_22588_4337_4621_- contig_22588 getorf_JV gene 4337 4621 .
## 746 contig_22588_4626_5651_- contig_22588 getorf_JV gene 4626 5651 .
## 747 contig_22588_495_713_- contig_22588 getorf_JV gene 495 713 .
## 748 contig_22588_5708_7849_+ contig_22588 getorf_JV gene 5708 7849 .
## 749 contig_22588_713_1453_- contig_22588 getorf_JV gene 713 1453 .
## 816 contig_2709_183_2489_- contig_2709 getorf_JV gene 183 2489 .
## 817 contig_2709_2485_3339_- contig_2709 getorf_JV gene 2485 3339 .
## 818 contig_2709_3512_3682_+ contig_2709 getorf_JV gene 3512 3682 .
## 819 contig_2709_3724_4242_+ contig_2709 getorf_JV gene 3724 4242 .
## 854 contig_3127_1203_2399_+ contig_3127 getorf_JV gene 1203 2399 .
## 855 contig_3127_21_173_- contig_3127 getorf_JV gene 21 173 .
## 856 contig_3127_230_1123_+ contig_3127 getorf_JV gene 230 1123 .
## 857 contig_3127_2426_2989_+ contig_3127 getorf_JV gene 2426 2989 .
## 858 contig_3127_3036_3938_+ contig_3127 getorf_JV gene 3036 3938 .
## 893 contig_356_11400_11675_+ contig_356 getorf_JV gene 11400 11675 .
## 894 contig_356_11866_12045_+ contig_356 getorf_JV gene 11866 12045 .
## 895 contig_356_12432_13463_- contig_356 getorf_JV gene 12432 13463 .
## 896 contig_356_129_323_+ contig_356 getorf_JV gene 129 323 .
## 897 contig_356_13522_18744_- contig_356 getorf_JV gene 13522 18744 .
## 898 contig_356_1416_2081_+ contig_356 getorf_JV gene 1416 2081 .
## 899 contig_356_18732_18896_- contig_356 getorf_JV gene 18732 18896 .
## 900 contig_356_19208_20314_+ contig_356 getorf_JV gene 19208 20314 .
## 901 contig_356_20314_20475_- contig_356 getorf_JV gene 20314 20475 .
## 902 contig_356_2116_6132_+ contig_356 getorf_JV gene 2116 6132 .
## 903 contig_356_375_1388_- contig_356 getorf_JV gene 375 1388 .
## 904 contig_356_6373_8277_+ contig_356 getorf_JV gene 6373 8277 .
## 905 contig_356_8294_9499_+ contig_356 getorf_JV gene 8294 9499 .
## 906 contig_356_9543_11363_- contig_356 getorf_JV gene 9543 11363 .
## 1054 contig_682_10487_11527_+ contig_682 getorf_JV gene 10487 11527 .
## 1055 contig_682_11514_12587_- contig_682 getorf_JV gene 11514 12587 .
## 1056 contig_682_143_523_- contig_682 getorf_JV gene 143 523 .
## 1057 contig_682_1632_3863_+ contig_682 getorf_JV gene 1632 3863 .
## 1058 contig_682_4002_4976_- contig_682 getorf_JV gene 4002 4976 .
## 1059 contig_682_5074_6042_- contig_682 getorf_JV gene 5074 6042 .
## 1060 contig_682_553_1530_+ contig_682 getorf_JV gene 553 1530 .
## 1061 contig_682_6051_6554_- contig_682 getorf_JV gene 6051 6554 .
## 1062 contig_682_6594_6875_- contig_682 getorf_JV gene 6594 6875 .
## 1063 contig_682_6900_7913_- contig_682 getorf_JV gene 6900 7913 .
## 1064 contig_682_8085_8537_+ contig_682 getorf_JV gene 8085 8537 .
## 1065 contig_682_8614_9309_- contig_682 getorf_JV gene 8614 9309 .
## 1066 contig_682_9399_9851_+ contig_682 getorf_JV gene 9399 9851 .
## 1067 contig_682_9856_10461_+ contig_682 getorf_JV gene 9856 10461 .
## 1068 contig_701_10875_11123_- contig_701 getorf_JV gene 10875 11123 .
## 1069 contig_701_11309_12256_+ contig_701 getorf_JV gene 11309 12256 .
## 1070 contig_701_577_804_- contig_701 getorf_JV gene 577 804 .
## 1071 contig_701_6141_6950_- contig_701 getorf_JV gene 6141 6950 .
## 1072 contig_701_7408_7818_- contig_701 getorf_JV gene 7408 7818 .
## 1073 contig_701_7954_9372_- contig_701 getorf_JV gene 7954 9372 .
## 1074 contig_701_800_6073_- contig_701 getorf_JV gene 800 6073 .
## 1075 contig_701_9410_9571_- contig_701 getorf_JV gene 9410 9571 .
## 1076 contig_701_9683_10882_- contig_701 getorf_JV gene 9683 10882 .
## 1253 contig_9355_1174_1404_- contig_9355 getorf_JV gene 1174 1404 .
## 1254 contig_9355_1524_1712_- contig_9355 getorf_JV gene 1524 1712 .
## 1255 contig_9355_190_942_- contig_9355 getorf_JV gene 190 942 .
## strand phase attributes seq_length subject_id identity
## 477 FALSE 1 957 1096 QKN22475.1 0.965
## 489 FALSE 1 975 1079 QKN22518.1 0.993
## 564 FALSE 1 222 1032 <NA> NA
## 565 TRUE 1 198 1032 <NA> NA
## 617 TRUE 1 1713 29250 QKN22474.1 0.942
## 618 TRUE 1 1467 29250 QKN22475.1 0.946
## 619 TRUE 1 798 29250 QKN22476.1 0.943
## 620 FALSE 1 366 29250 QKN22472.1 0.975
## 621 FALSE 1 471 29250 QKN22471.1 0.979
## 622 FALSE 1 744 29250 QKN22470.1 0.951
## 623 FALSE 1 888 29250 QKN22469.1 0.972
## 624 FALSE 1 627 29250 QKN22467.1 0.980
## 625 TRUE 1 198 29250 <NA> NA
## 626 TRUE 1 1668 29250 QKN22477.1 0.982
## 627 TRUE 1 363 29250 <NA> NA
## 628 TRUE 1 285 29250 YP_009345614.1 0.425
## 629 TRUE 1 270 29250 <NA> NA
## 630 FALSE 1 156 29250 <NA> NA
## 631 FALSE 1 1041 29250 QKN22465.1 0.974
## 632 TRUE 1 1770 29250 QKN22521.1 0.993
## 633 FALSE 1 294 29250 <NA> NA
## 634 FALSE 1 432 29250 <NA> NA
## 635 FALSE 1 741 29250 <NA> NA
## 636 FALSE 1 162 29250 <NA> NA
## 637 FALSE 1 279 29250 <NA> NA
## 638 TRUE 1 2097 29250 YP_009345672.1 0.349
## 639 TRUE 1 219 29250 QKN22507.1 0.731
## 640 TRUE 1 174 29250 <NA> NA
## 641 TRUE 1 309 29250 <NA> NA
## 642 TRUE 1 1029 29250 <NA> NA
## 643 TRUE 1 1662 29250 YP_009345685.1 0.346
## 644 FALSE 1 843 29250 YP_009345671.1 0.333
## 645 FALSE 1 492 29250 <NA> NA
## 646 FALSE 1 2082 29250 QKN22522.1 0.985
## 647 FALSE 1 165 29250 <NA> NA
## 648 TRUE 1 819 29250 QKN22473.1 0.995
## 721 TRUE 1 498 10753 QKN22515.1 1.000
## 722 TRUE 1 309 10753 QKN22501.1 1.000
## 723 TRUE 1 234 10753 QKN22499.1 0.853
## 724 TRUE 1 1389 10753 QKN22502.1 0.989
## 725 TRUE 1 1002 10753 QKN22500.1 0.991
## 726 FALSE 1 3171 10753 QKN22516.1 0.982
## 727 TRUE 1 828 10753 QKN22510.1 0.960
## 728 TRUE 1 879 10753 QKN22511.1 0.993
## 729 TRUE 1 582 10753 QKN22512.1 0.989
## 730 TRUE 1 705 10753 QKN22513.1 0.987
## 731 TRUE 1 357 10753 QKN22514.1 0.957
## 741 TRUE 1 1248 7997 QKN22504.1 0.987
## 742 TRUE 1 906 7997 QKN22505.1 1.000
## 743 FALSE 1 198 7997 <NA> NA
## 744 FALSE 1 258 7997 <NA> NA
## 745 FALSE 1 285 7997 <NA> NA
## 746 FALSE 1 1026 7997 QKN22508.1 0.979
## 747 FALSE 1 219 7997 <NA> NA
## 748 TRUE 1 2142 7997 QKN22506.1 0.976
## 749 FALSE 1 741 7997 QKN22509.1 0.995
## 816 FALSE 1 2307 4458 QKN22456.1 0.990
## 817 FALSE 1 855 4458 QKN22455.1 0.989
## 818 TRUE 1 171 4458 <NA> NA
## 819 TRUE 1 519 4458 XP_002083176.1 0.396
## 854 TRUE 1 1197 3938 QKN22497.1 0.992
## 855 FALSE 1 153 3938 <NA> NA
## 856 TRUE 1 894 3938 QKN22496.1 0.993
## 857 TRUE 1 564 3938 QKN22498.1 0.984
## 858 TRUE 1 903 3938 QKN22499.1 0.858
## 893 TRUE 1 276 20719 <NA> NA
## 894 TRUE 1 180 20719 <NA> NA
## 895 FALSE 1 1032 20719 QKN22479.1 0.916
## 896 TRUE 1 195 20719 <NA> NA
## 897 FALSE 1 5223 20719 QKN22478.1 0.977
## 898 TRUE 1 666 20719 QKN22517.1 0.977
## 899 FALSE 1 165 20719 <NA> NA
## 900 TRUE 1 1107 20719 QKN22483.1 0.981
## 901 FALSE 1 162 20719 <NA> NA
## 902 TRUE 1 4017 20719 QKN22518.1 0.878
## 903 FALSE 1 1014 20719 QKN22519.1 0.991
## 904 TRUE 1 1905 20719 <NA> NA
## 905 TRUE 1 1206 20719 YP_009345656.1 0.289
## 906 FALSE 1 1821 20719 QKN22481.1 0.978
## 1054 TRUE 1 1041 12802 YP_009345706.1 0.296
## 1055 FALSE 1 1074 12802 <NA> NA
## 1056 FALSE 1 381 12802 <NA> NA
## 1057 TRUE 1 2232 12802 QKN22485.1 0.979
## 1058 FALSE 1 975 12802 QKN22492.1 0.933
## 1059 FALSE 1 969 12802 QKN22491.1 0.987
## 1060 TRUE 1 978 12802 QKN22484.1 0.957
## 1061 FALSE 1 504 12802 QKN22490.1 1.000
## 1062 FALSE 1 282 12802 <NA> NA
## 1063 FALSE 1 1014 12802 QKN22489.1 0.970
## 1064 TRUE 1 453 12802 QKN22487.1 0.993
## 1065 FALSE 1 696 12802 QKN22488.1 0.990
## 1066 TRUE 1 453 12802 <NA> NA
## 1067 TRUE 1 606 12802 <NA> NA
## 1068 FALSE 1 249 12482 <NA> NA
## 1069 TRUE 1 948 12482 QKN22463.1 0.958
## 1070 FALSE 1 228 12482 <NA> NA
## 1071 FALSE 1 810 12482 QKN22460.1 0.977
## 1072 FALSE 1 411 12482 QKN22459.1 0.978
## 1073 FALSE 1 1419 12482 QKN22458.1 0.987
## 1074 FALSE 1 5274 12482 QKN22461.1 0.977
## 1075 FALSE 1 162 12482 <NA> NA
## 1076 FALSE 1 1200 12482 QKN22457.1 0.990
## 1253 FALSE 1 231 1744 QKN22523.1 0.680
## 1254 FALSE 1 189 1744 QKN22523.1 0.677
## 1255 FALSE 1 753 1744 QKN22523.1 0.988
## alignment_length mismatches gap_opens qstart qend sstart send
## 477 319 11 0 1 319 161 473
## 489 295 2 0 1 295 959 1252
## 564 NA NA NA NA NA NA NA
## 565 NA NA NA NA NA NA NA
## 617 571 33 0 1 571 1 570
## 618 489 25 0 1 489 1 473
## 619 266 15 0 1 266 1 259
## 620 122 3 0 1 122 1 122
## 621 146 3 0 8 153 2 147
## 622 248 12 0 1 248 1 238
## 623 296 8 0 1 296 1 291
## 624 209 4 0 1 209 1 207
## 625 NA NA NA NA NA NA NA
## 626 556 10 0 1 556 1 556
## 627 NA NA NA NA NA NA NA
## 628 47 26 0 41 87 38 84
## 629 NA NA NA NA NA NA NA
## 630 NA NA NA NA NA NA NA
## 631 347 9 0 1 347 1 346
## 632 589 4 0 1 589 1 588
## 633 NA NA NA NA NA NA NA
## 634 NA NA NA NA NA NA NA
## 635 NA NA NA NA NA NA NA
## 636 NA NA NA NA NA NA NA
## 637 NA NA NA NA NA NA NA
## 638 615 384 0 83 697 52 642
## 639 67 16 0 7 68 35 101
## 640 NA NA NA NA NA NA NA
## 641 NA NA NA NA NA NA NA
## 642 NA NA NA NA NA NA NA
## 643 457 297 0 92 548 78 532
## 644 287 185 0 3 281 13 299
## 645 NA NA NA NA NA NA NA
## 646 694 10 0 1 694 1 693
## 647 NA NA NA NA NA NA NA
## 648 244 1 0 30 273 1 244
## 721 166 0 0 1 166 1 166
## 722 102 0 0 1 102 1 102
## 723 73 10 0 6 78 314 385
## 724 463 5 0 1 463 1 462
## 725 334 3 0 1 334 1 334
## 726 704 13 0 1 704 30 728
## 727 276 11 0 1 276 1 269
## 728 292 2 0 1 292 1 292
## 729 194 2 0 1 194 1 194
## 730 235 3 0 1 235 1 233
## 731 119 5 0 1 119 1 116
## 741 416 5 0 1 416 1 412
## 742 302 0 0 1 302 1 302
## 743 NA NA NA NA NA NA NA
## 744 NA NA NA NA NA NA NA
## 745 NA NA NA NA NA NA NA
## 746 346 7 0 1 342 1 346
## 747 NA NA NA NA NA NA NA
## 748 714 17 0 1 714 19 727
## 749 247 1 0 1 247 1 247
## 816 769 8 0 1 769 1 769
## 817 285 3 0 1 285 1 285
## 818 NA NA NA NA NA NA NA
## 819 101 60 0 35 134 30 130
## 854 399 3 0 1 399 1 399
## 855 NA NA NA NA NA NA NA
## 856 298 2 0 1 298 1 298
## 857 188 3 0 1 188 1 188
## 858 333 43 0 1 301 1 333
## 893 NA NA NA NA NA NA NA
## 894 NA NA NA NA NA NA NA
## 895 349 29 0 1 344 1 349
## 896 NA NA NA NA NA NA NA
## 897 1747 40 0 1 1741 1 1747
## 898 222 5 0 1 222 1 218
## 899 NA NA NA NA NA NA NA
## 900 369 7 0 1 369 1 369
## 901 NA NA NA NA NA NA NA
## 902 1271 153 0 39 1309 1 1252
## 903 338 3 0 1 338 1 338
## 904 NA NA NA NA NA NA NA
## 905 389 247 0 9 397 11 358
## 906 617 13 0 1 607 1 617
## 1054 347 235 0 3 337 5 351
## 1055 NA NA NA NA NA NA NA
## 1056 NA NA NA NA NA NA NA
## 1057 744 15 0 1 744 1 737
## 1058 316 21 0 1 314 1 316
## 1059 323 4 0 1 323 1 320
## 1060 326 14 0 1 326 1 326
## 1061 168 0 0 1 168 1 168
## 1062 NA NA NA NA NA NA NA
## 1063 338 10 0 1 338 1 335
## 1064 151 1 0 1 151 1 151
## 1065 104 1 0 129 232 1 104
## 1066 NA NA NA NA NA NA NA
## 1067 NA NA NA NA NA NA NA
## 1068 NA NA NA NA NA NA NA
## 1069 316 13 0 1 316 1 316
## 1070 NA NA NA NA NA NA NA
## 1071 270 6 0 1 270 1 270
## 1072 137 3 0 1 137 1 137
## 1073 473 6 0 1 473 1 473
## 1074 1762 40 0 1 1758 1 1762
## 1075 NA NA NA NA NA NA NA
## 1076 400 4 0 1 400 1 398
## 1253 73 22 0 1 69 54 126
## 1254 57 17 0 4 58 103 159
## 1255 251 3 0 1 251 1 251
## evalue bitscore
## 477 1.310000e-201 624
## 489 9.019000e-192 596
## 564 NA NA
## 565 NA NA
## 617 2.574082e-321 984
## 618 2.527000e-295 904
## 619 1.383000e-152 480
## 620 4.059000e-75 248
## 621 9.200000e-93 301
## 622 4.351000e-153 480
## 623 1.220000e-182 568
## 624 7.725000e-122 388
## 625 NA NA
## 626 0.000000e+00 1126
## 627 NA NA
## 628 9.676000e-06 47
## 629 NA NA
## 630 NA NA
## 631 2.014000e-218 674
## 632 0.000000e+00 1204
## 633 NA NA
## 634 NA NA
## 635 NA NA
## 636 NA NA
## 637 NA NA
## 638 1.353000e-100 349
## 639 1.973000e-22 94
## 640 NA NA
## 641 NA NA
## 642 NA NA
## 643 6.465000e-67 244
## 644 8.672000e-39 151
## 645 NA NA
## 646 0.000000e+00 1401
## 647 NA NA
## 648 2.505000e-145 459
## 721 6.913000e-109 348
## 722 4.516000e-62 209
## 723 1.145000e-24 100
## 724 1.210000e-303 927
## 725 1.297000e-223 689
## 726 0.000000e+00 1390
## 727 3.561000e-157 494
## 728 1.155000e-196 608
## 729 1.959000e-125 397
## 730 9.427000e-151 473
## 731 6.826000e-58 198
## 741 9.498000e-269 824
## 742 1.090000e-191 595
## 743 NA NA
## 744 NA NA
## 745 NA NA
## 746 1.448000e-217 672
## 747 NA NA
## 748 0.000000e+00 1448
## 749 1.724000e-157 493
## 816 0.000000e+00 1535
## 817 2.571000e-182 567
## 818 NA NA
## 819 1.455000e-21 96
## 854 4.731000e-270 826
## 855 NA NA
## 856 2.112000e-190 591
## 857 5.552000e-119 378
## 858 2.016000e-175 548
## 893 NA NA
## 894 NA NA
## 895 3.993000e-207 642
## 896 NA NA
## 897 0.000000e+00 3397
## 898 1.080000e-138 437
## 899 NA NA
## 900 7.696000e-244 749
## 901 NA NA
## 902 0.000000e+00 2224
## 903 1.898000e-225 694
## 904 NA NA
## 905 7.797000e-40 159
## 906 0.000000e+00 1211
## 1054 5.965000e-32 134
## 1055 NA NA
## 1056 NA NA
## 1057 0.000000e+00 1435
## 1058 6.033000e-162 510
## 1059 7.103000e-197 611
## 1060 5.392000e-209 646
## 1061 1.043000e-108 347
## 1062 NA NA
## 1063 1.570000e-210 651
## 1064 2.352000e-95 308
## 1065 2.531000e-58 206
## 1066 NA NA
## 1067 NA NA
## 1068 NA NA
## 1069 3.813000e-207 640
## 1070 NA NA
## 1071 4.182000e-178 554
## 1072 3.896000e-77 255
## 1073 1.345000e-309 945
## 1074 0.000000e+00 3413
## 1075 NA NA
## 1076 9.075000e-260 797
## 1253 4.144000e-17 79
## 1254 8.819000e-15 71
## 1255 2.666000e-153 481
## annotation
## 477 putative protein 21 [Drosophila-associated filamentous virus]
## 489 putative DNA PolB, partial [Drosophila-associated filamentous virus]
## 564 <NA>
## 565 <NA>
## 617 putative protein 20 [Drosophila-associated filamentous virus]
## 618 putative protein 21 [Drosophila-associated filamentous virus]
## 619 putative protein 22 [Drosophila-associated filamentous virus]
## 620 putative protein 18 [Drosophila-associated filamentous virus]
## 621 putative protein 17 [Drosophila-associated filamentous virus]
## 622 putative ORF19 [Drosophila-associated filamentous virus]
## 623 putative protein 15 [Drosophila-associated filamentous virus]
## 624 putative protein 13 [Drosophila-associated filamentous virus]
## 625 <NA>
## 626 putative JmJC domain protein [Drosophila-associated filamentous virus]
## 627 <NA>
## 628 hypothetical protein LbFV_ORF10 [Leptopilina boulardi filamentous virus]
## 629 <NA>
## 630 <NA>
## 631 putative protein 11 [Drosophila-associated filamentous virus]
## 632 putative ORF107 [Drosophila-associated filamentous virus]
## 633 <NA>
## 634 <NA>
## 635 <NA>
## 636 <NA>
## 637 <NA>
## 638 hypothetical protein LbFV_ORF68 [Leptopilina boulardi filamentous virus]
## 639 putative protein 53 [Drosophila-associated filamentous virus]
## 640 <NA>
## 641 <NA>
## 642 <NA>
## 643 putative ATPase [Leptopilina boulardi filamentous virus]
## 644 hypothetical protein LbFV_ORF67 [Leptopilina boulardi filamentous virus]
## 645 <NA>
## 646 putative protein 65 [Drosophila-associated filamentous virus]
## 647 <NA>
## 648 putative protein 19 [Drosophila-associated filamentous virus]
## 721 putative protein 61 [Drosophila-associated filamentous virus]
## 722 putative protein 47 [Drosophila-associated filamentous virus]
## 723 putative protein 45 [Drosophila-associated filamentous virus]
## 724 putative protein 48 [Drosophila-associated filamentous virus]
## 725 putative ORF43 [Drosophila-associated filamentous virus]
## 726 putative ORF96, partial [Drosophila-associated filamentous virus]
## 727 putative protein 56 [Drosophila-associated filamentous virus]
## 728 putative protein 57 [Drosophila-associated filamentous virus]
## 729 putative protein 58 [Drosophila-associated filamentous virus]
## 730 putative Ac81-like protein [Drosophila-associated filamentous virus]
## 731 putative protein 60 [Drosophila-associated filamentous virus]
## 741 putative lecithine cholesterol acyltransferase [Drosophila-associated filamentous virus]
## 742 putative protein 51 [Drosophila-associated filamentous virus]
## 743 <NA>
## 744 <NA>
## 745 <NA>
## 746 putative protein 54 [Drosophila-associated filamentous virus]
## 747 <NA>
## 748 PIF1-like protein [Drosophila-associated filamentous virus]
## 749 putative protein 55 [Drosophila-associated filamentous virus]
## 816 P74-like protein [Drosophila-associated filamentous virus]
## 817 putative protein 1 [Drosophila-associated filamentous virus]
## 818 <NA>
## 819 lysozyme X [Drosophila simulans]
## 854 putative ORF20 [Drosophila-associated filamentous virus]
## 855 <NA>
## 856 putative protein 42 [Drosophila-associated filamentous virus]
## 857 putative protein 44 [Drosophila-associated filamentous virus]
## 858 putative protein 45 [Drosophila-associated filamentous virus]
## 893 <NA>
## 894 <NA>
## 895 putative protein 25 [Drosophila-associated filamentous virus]
## 896 <NA>
## 897 PIF2-like protein [Drosophila-associated filamentous virus]
## 898 putative protein 67 [Drosophila-associated filamentous virus]
## 899 <NA>
## 900 putative protein 29 [Drosophila-associated filamentous virus]
## 901 <NA>
## 902 putative DNA PolB, partial [Drosophila-associated filamentous virus]
## 903 putative protein 69 [Drosophila-associated filamentous virus]
## 904 <NA>
## 905 hypothetical protein LbFV_ORF52 [Leptopilina boulardi filamentous virus]
## 906 putative protein 27 [Drosophila-associated filamentous virus]
## 1054 hypothetical protein LbFV_ORF102 [Leptopilina boulardi filamentous virus]
## 1055 <NA>
## 1056 <NA>
## 1057 putative ORF5 [Drosophila-associated filamentous virus]
## 1058 putative protein 38 [Drosophila-associated filamentous virus]
## 1059 putative protein 37 [Drosophila-associated filamentous virus]
## 1060 putative protein 30 [Drosophila-associated filamentous virus]
## 1061 putative protein 36 [Drosophila-associated filamentous virus]
## 1062 <NA>
## 1063 putative ORF24 [Drosophila-associated filamentous virus]
## 1064 putative protein 33 [Drosophila-associated filamentous virus]
## 1065 putative protein 34 [Drosophila-associated filamentous virus]
## 1066 <NA>
## 1067 <NA>
## 1068 <NA>
## 1069 putative ORF1 [Drosophila-associated filamentous virus]
## 1070 <NA>
## 1071 putative nudix domain protein [Drosophila-associated filamentous virus]
## 1072 putative protein 5 [Drosophila-associated filamentous virus]
## 1073 putative protein 4 [Drosophila-associated filamentous virus]
## 1074 putative protein 7 [Drosophila-associated filamentous virus]
## 1075 <NA>
## 1076 putative ORF2 [Drosophila-associated filamentous virus]
## 1253 ODV-E66-like protein [Drosophila-associated filamentous virus]
## 1254 ODV-E66-like protein [Drosophila-associated filamentous virus]
## 1255 ODV-E66-like protein [Drosophila-associated filamentous virus]
D.mel
contig_set=paste0("contig_",c(2753,20176,3903,4179,22788,15495,3119,2457,22865,11850))
contig_set_unassigned=paste0("contig_", c(8677))
# store for later fusion of corresponding lines
virus_list$'Vesantovirus_D.mel'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wga2 , name = "Vesantovirus_D.mel")
## orf_name seqid source type start end score
## 106 contig_11850_1167_1355_+ contig_11850 getorf_JV gene 1167 1355 .
## 107 contig_11850_180_350_+ contig_11850 getorf_JV gene 180 350 .
## 108 contig_11850_482_676_+ contig_11850 getorf_JV gene 482 676 .
## 266 contig_15495_272_454_+ contig_15495 getorf_JV gene 272 454 .
## 267 contig_15495_953_1141_+ contig_15495 getorf_JV gene 953 1141 .
## 503 contig_20176_336_1028_- contig_20176 getorf_JV gene 336 1028 .
## 750 contig_22788_1197_4418_+ contig_22788 getorf_JV gene 1197 4418 .
## 751 contig_22788_188_937_+ contig_22788 getorf_JV gene 188 937 .
## 752 contig_22788_3_158_- contig_22788 getorf_JV gene 3 158 .
## 753 contig_22788_4495_4677_+ contig_22788 getorf_JV gene 4495 4677 .
## 754 contig_22788_943_1113_+ contig_22788 getorf_JV gene 943 1113 .
## 755 contig_22865_2320_2808_- contig_22865 getorf_JV gene 2320 2808 .
## 756 contig_22865_251_919_+ contig_22865 getorf_JV gene 251 919 .
## 757 contig_22865_2887_3303_+ contig_22865 getorf_JV gene 2887 3303 .
## 758 contig_22865_3311_3895_- contig_22865 getorf_JV gene 3311 3895 .
## 759 contig_22865_3941_4294_+ contig_22865 getorf_JV gene 3941 4294 .
## 760 contig_22865_895_2271_+ contig_22865 getorf_JV gene 895 2271 .
## 787 contig_2457_1_183_- contig_2457 getorf_JV gene 1 183 .
## 788 contig_2457_1444_4692_- contig_2457 getorf_JV gene 1444 4692 .
## 789 contig_2457_173_1441_+ contig_2457 getorf_JV gene 173 1441 .
## 790 contig_2457_4691_4843_+ contig_2457 getorf_JV gene 4691 4843 .
## 820 contig_2753_285_3890_- contig_2753 getorf_JV gene 285 3890 .
## 850 contig_3119_1409_2581_- contig_3119 getorf_JV gene 1409 2581 .
## 851 contig_3119_2591_2938_- contig_3119 getorf_JV gene 2591 2938 .
## 852 contig_3119_3119_3409_- contig_3119 getorf_JV gene 3119 3409 .
## 853 contig_3119_94_930_- contig_3119 getorf_JV gene 94 930 .
## 907 contig_3903_18_1709_+ contig_3903 getorf_JV gene 18 1709 .
## 908 contig_3903_1876_2634_+ contig_3903 getorf_JV gene 1876 2634 .
## 909 contig_3903_2931_3113_+ contig_3903 getorf_JV gene 2931 3113 .
## 910 contig_3903_3151_3300_- contig_3903 getorf_JV gene 3151 3300 .
## 911 contig_3903_3167_3316_+ contig_3903 getorf_JV gene 3167 3316 .
## 947 contig_4179_2109_2885_+ contig_4179 getorf_JV gene 2109 2885 .
## 948 contig_4179_278_1879_- contig_4179 getorf_JV gene 278 1879 .
## 1187 contig_8677_1150_1299_- contig_8677 getorf_JV gene 1150 1299 .
## 1188 contig_8677_116_295_- contig_8677 getorf_JV gene 116 295 .
## 1189 contig_8677_1277_1780_+ contig_8677 getorf_JV gene 1277 1780 .
## 1190 contig_8677_1352_1780_+ contig_8677 getorf_JV gene 1352 1780 .
## 1191 contig_8677_334_621_- contig_8677 getorf_JV gene 334 621 .
## 1192 contig_8677_334_891_- contig_8677 getorf_JV gene 334 891 .
## 1193 contig_8677_620_781_+ contig_8677 getorf_JV gene 620 781 .
## 1194 contig_8677_884_1048_+ contig_8677 getorf_JV gene 884 1048 .
## 1195 contig_8677_9_356_- contig_8677 getorf_JV gene 9 356 .
## 1196 contig_8677_963_1184_+ contig_8677 getorf_JV gene 963 1184 .
## strand phase attributes seq_length subject_id identity
## 106 TRUE 1 189 1484 <NA> NA
## 107 TRUE 1 171 1484 <NA> NA
## 108 TRUE 1 195 1484 QKT21494.1 0.984
## 266 TRUE 1 183 1249 QKT21493.1 1.000
## 267 TRUE 1 189 1249 <NA> NA
## 503 FALSE 1 693 1063 QKT21491.1 0.987
## 750 TRUE 1 3222 4745 QKT21485.1 0.991
## 751 TRUE 1 750 4745 WP_007549166.1 0.376
## 752 FALSE 1 156 4745 <NA> NA
## 753 TRUE 1 183 4745 <NA> NA
## 754 TRUE 1 171 4745 <NA> NA
## 755 FALSE 1 489 4294 QKT21502.1 1.000
## 756 TRUE 1 669 4294 QKT21500.1 1.000
## 757 TRUE 1 417 4294 <NA> NA
## 758 FALSE 1 585 4294 QKT21499.1 1.000
## 759 TRUE 1 354 4294 QKT21477.1 0.674
## 760 TRUE 1 1377 4294 QKT21501.1 0.997
## 787 FALSE 1 183 4844 <NA> NA
## 788 FALSE 1 3249 4844 AQN78642.1 0.988
## 789 TRUE 1 1269 4844 QKT21497.1 1.000
## 790 TRUE 1 153 4844 <NA> NA
## 820 FALSE 1 3606 4419 QKT21484.1 0.981
## 850 FALSE 1 1173 3702 QKT21506.1 1.000
## 851 FALSE 1 348 3702 <NA> NA
## 852 FALSE 1 291 3702 <NA> NA
## 853 FALSE 1 837 3702 QKT21522.1 1.000
## 907 TRUE 1 1692 3316 QKT21488.1 0.936
## 908 TRUE 1 759 3316 QKT21525.1 1.000
## 909 TRUE 1 183 3316 <NA> NA
## 910 FALSE 1 150 3316 QKT21524.1 0.880
## 911 TRUE 1 150 3316 <NA> NA
## 947 TRUE 1 777 3153 QKT21494.1 1.000
## 948 FALSE 1 1602 3153 QKT21493.1 1.000
## 1187 FALSE 2 150 1830 <NA> NA
## 1188 FALSE 1 180 1830 <NA> NA
## 1189 TRUE 2 504 1830 <NA> NA
## 1190 TRUE 1 429 1830 <NA> NA
## 1191 FALSE 1 288 1830 <NA> NA
## 1192 FALSE 2 558 1830 <NA> NA
## 1193 TRUE 1 162 1830 <NA> NA
## 1194 TRUE 2 165 1830 <NA> NA
## 1195 FALSE 2 348 1830 <NA> NA
## 1196 TRUE 1 222 1830 <NA> NA
## alignment_length mismatches gap_opens qstart qend sstart send evalue
## 106 NA NA NA NA NA NA NA NA
## 107 NA NA NA NA NA NA NA NA
## 108 65 1 0 1 65 195 259 4.251e-36
## 266 61 0 0 1 61 474 534 3.467e-36
## 267 NA NA NA NA NA NA NA NA
## 503 231 3 0 1 231 1 231 4.847e-146
## 750 1074 10 0 1 1074 1 1074 0.000e+00
## 751 93 56 0 1 91 167 259 5.418e-09
## 752 NA NA NA NA NA NA NA NA
## 753 NA NA NA NA NA NA NA NA
## 754 NA NA NA NA NA NA NA NA
## 755 163 0 0 1 163 1 163 4.686e-103
## 756 223 0 0 1 223 1 223 1.391e-117
## 757 NA NA NA NA NA NA NA NA
## 758 195 0 0 1 195 1 195 1.131e-112
## 759 79 24 0 40 118 139 214 5.457e-21
## 760 459 1 0 1 459 1 459 2.508e-293
## 787 NA NA NA NA NA NA NA NA
## 788 1083 13 0 1 1083 1 1083 0.000e+00
## 789 423 0 0 1 423 1 423 4.661e-281
## 790 NA NA NA NA NA NA NA NA
## 820 1202 23 0 1 1202 1 1202 0.000e+00
## 850 391 0 0 1 391 1 391 2.261e-263
## 851 NA NA NA NA NA NA NA NA
## 852 NA NA NA NA NA NA NA NA
## 853 279 0 0 1 279 1 279 4.857e-185
## 907 564 36 0 1 564 1 564 0.000e+00
## 908 253 0 0 1 253 1 253 9.866e-169
## 909 NA NA NA NA NA NA NA NA
## 910 50 6 0 1 49 1 50 1.081e-17
## 911 NA NA NA NA NA NA NA NA
## 947 259 0 0 1 259 1 259 3.279e-171
## 948 534 0 0 1 534 1 534 0.000e+00
## 1187 NA NA NA NA NA NA NA NA
## 1188 NA NA NA NA NA NA NA NA
## 1189 NA NA NA NA NA NA NA NA
## 1190 NA NA NA NA NA NA NA NA
## 1191 NA NA NA NA NA NA NA NA
## 1192 NA NA NA NA NA NA NA NA
## 1193 NA NA NA NA NA NA NA NA
## 1194 NA NA NA NA NA NA NA NA
## 1195 NA NA NA NA NA NA NA NA
## 1196 NA NA NA NA NA NA NA NA
## bitscore
## 106 NA
## 107 NA
## 108 132
## 266 132
## 267 NA
## 503 459
## 750 2120
## 751 62
## 752 NA
## 753 NA
## 754 NA
## 755 331
## 756 376
## 757 NA
## 758 361
## 759 92
## 760 897
## 787 NA
## 788 2209
## 789 859
## 790 NA
## 820 2463
## 850 807
## 851 NA
## 852 NA
## 853 574
## 907 1088
## 908 526
## 909 NA
## 910 79
## 911 NA
## 947 533
## 948 1090
## 1187 NA
## 1188 NA
## 1189 NA
## 1190 NA
## 1191 NA
## 1192 NA
## 1193 NA
## 1194 NA
## 1195 NA
## 1196 NA
## annotation
## 106 <NA>
## 107 <NA>
## 108 putative glycoprotein [Vesanto virus]
## 266 putative DUF3472 protein [Vesanto virus]
## 267 <NA>
## 503 hypothetical protein 1 [Vesanto virus]
## 750 putative structural protein [Vesanto virus]
## 751 hypothetical protein [Wolbachia endosymbiont of Drosophila ananassae]
## 752 <NA>
## 753 <NA>
## 754 <NA>
## 755 putative coat protein [Vesanto virus]
## 756 hypothetical protein 3 [Vesanto virus]
## 757 <NA>
## 758 hypothetical protein 2 [Vesanto virus]
## 759 hypothetical protein [Vesanto virus]
## 760 putative NS1 protein [Vesanto virus]
## 787 <NA>
## 788 putative structural protein [Vesanto virus]
## 789 putative capsid protein [Vesanto virus]
## 790 <NA>
## 820 putative DNA polymerase B [Vesanto virus]
## 850 putative nuclease domain protein [Vesanto virus]
## 851 <NA>
## 852 <NA>
## 853 putative glycoprotein [Vesanto virus]
## 907 putative DUF3472 protein [Vesanto virus]
## 908 putative glycoprotein [Vesanto virus]
## 909 <NA>
## 910 putative DUF3472 protein [Vesanto virus]
## 911 <NA>
## 947 putative glycoprotein [Vesanto virus]
## 948 putative DUF3472 protein [Vesanto virus]
## 1187 <NA>
## 1188 <NA>
## 1189 <NA>
## 1190 <NA>
## 1191 <NA>
## 1192 <NA>
## 1193 <NA>
## 1194 <NA>
## 1195 <NA>
## 1196 <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score
## 106 contig_11850_1167_1355_+ contig_11850 getorf_JV gene 1167 1355 .
## 107 contig_11850_180_350_+ contig_11850 getorf_JV gene 180 350 .
## 108 contig_11850_482_676_+ contig_11850 getorf_JV gene 482 676 .
## 266 contig_15495_272_454_+ contig_15495 getorf_JV gene 272 454 .
## 267 contig_15495_953_1141_+ contig_15495 getorf_JV gene 953 1141 .
## 503 contig_20176_336_1028_- contig_20176 getorf_JV gene 336 1028 .
## 750 contig_22788_1197_4418_+ contig_22788 getorf_JV gene 1197 4418 .
## 751 contig_22788_188_937_+ contig_22788 getorf_JV gene 188 937 .
## 752 contig_22788_3_158_- contig_22788 getorf_JV gene 3 158 .
## 753 contig_22788_4495_4677_+ contig_22788 getorf_JV gene 4495 4677 .
## 754 contig_22788_943_1113_+ contig_22788 getorf_JV gene 943 1113 .
## 755 contig_22865_2320_2808_- contig_22865 getorf_JV gene 2320 2808 .
## 756 contig_22865_251_919_+ contig_22865 getorf_JV gene 251 919 .
## 757 contig_22865_2887_3303_+ contig_22865 getorf_JV gene 2887 3303 .
## 758 contig_22865_3311_3895_- contig_22865 getorf_JV gene 3311 3895 .
## 759 contig_22865_3941_4294_+ contig_22865 getorf_JV gene 3941 4294 .
## 760 contig_22865_895_2271_+ contig_22865 getorf_JV gene 895 2271 .
## 787 contig_2457_1_183_- contig_2457 getorf_JV gene 1 183 .
## 788 contig_2457_1444_4692_- contig_2457 getorf_JV gene 1444 4692 .
## 789 contig_2457_173_1441_+ contig_2457 getorf_JV gene 173 1441 .
## 790 contig_2457_4691_4843_+ contig_2457 getorf_JV gene 4691 4843 .
## 820 contig_2753_285_3890_- contig_2753 getorf_JV gene 285 3890 .
## 850 contig_3119_1409_2581_- contig_3119 getorf_JV gene 1409 2581 .
## 851 contig_3119_2591_2938_- contig_3119 getorf_JV gene 2591 2938 .
## 852 contig_3119_3119_3409_- contig_3119 getorf_JV gene 3119 3409 .
## 853 contig_3119_94_930_- contig_3119 getorf_JV gene 94 930 .
## 907 contig_3903_18_1709_+ contig_3903 getorf_JV gene 18 1709 .
## 908 contig_3903_1876_2634_+ contig_3903 getorf_JV gene 1876 2634 .
## 909 contig_3903_2931_3113_+ contig_3903 getorf_JV gene 2931 3113 .
## 910 contig_3903_3151_3300_- contig_3903 getorf_JV gene 3151 3300 .
## 911 contig_3903_3167_3316_+ contig_3903 getorf_JV gene 3167 3316 .
## 947 contig_4179_2109_2885_+ contig_4179 getorf_JV gene 2109 2885 .
## 948 contig_4179_278_1879_- contig_4179 getorf_JV gene 278 1879 .
## 1187 contig_8677_1150_1299_- contig_8677 getorf_JV gene 1150 1299 .
## 1188 contig_8677_116_295_- contig_8677 getorf_JV gene 116 295 .
## 1189 contig_8677_1277_1780_+ contig_8677 getorf_JV gene 1277 1780 .
## 1190 contig_8677_1352_1780_+ contig_8677 getorf_JV gene 1352 1780 .
## 1191 contig_8677_334_621_- contig_8677 getorf_JV gene 334 621 .
## 1192 contig_8677_334_891_- contig_8677 getorf_JV gene 334 891 .
## 1193 contig_8677_620_781_+ contig_8677 getorf_JV gene 620 781 .
## 1194 contig_8677_884_1048_+ contig_8677 getorf_JV gene 884 1048 .
## 1195 contig_8677_9_356_- contig_8677 getorf_JV gene 9 356 .
## 1196 contig_8677_963_1184_+ contig_8677 getorf_JV gene 963 1184 .
## strand phase attributes seq_length subject_id identity
## 106 TRUE 1 189 1484 <NA> NA
## 107 TRUE 1 171 1484 <NA> NA
## 108 TRUE 1 195 1484 QKT21494.1 0.984
## 266 TRUE 1 183 1249 QKT21493.1 1.000
## 267 TRUE 1 189 1249 <NA> NA
## 503 FALSE 1 693 1063 QKT21491.1 0.987
## 750 TRUE 1 3222 4745 QKT21485.1 0.991
## 751 TRUE 1 750 4745 WP_007549166.1 0.376
## 752 FALSE 1 156 4745 <NA> NA
## 753 TRUE 1 183 4745 <NA> NA
## 754 TRUE 1 171 4745 <NA> NA
## 755 FALSE 1 489 4294 QKT21502.1 1.000
## 756 TRUE 1 669 4294 QKT21500.1 1.000
## 757 TRUE 1 417 4294 <NA> NA
## 758 FALSE 1 585 4294 QKT21499.1 1.000
## 759 TRUE 1 354 4294 QKT21477.1 0.674
## 760 TRUE 1 1377 4294 QKT21501.1 0.997
## 787 FALSE 1 183 4844 <NA> NA
## 788 FALSE 1 3249 4844 AQN78642.1 0.988
## 789 TRUE 1 1269 4844 QKT21497.1 1.000
## 790 TRUE 1 153 4844 <NA> NA
## 820 FALSE 1 3606 4419 QKT21484.1 0.981
## 850 FALSE 1 1173 3702 QKT21506.1 1.000
## 851 FALSE 1 348 3702 <NA> NA
## 852 FALSE 1 291 3702 <NA> NA
## 853 FALSE 1 837 3702 QKT21522.1 1.000
## 907 TRUE 1 1692 3316 QKT21488.1 0.936
## 908 TRUE 1 759 3316 QKT21525.1 1.000
## 909 TRUE 1 183 3316 <NA> NA
## 910 FALSE 1 150 3316 QKT21524.1 0.880
## 911 TRUE 1 150 3316 <NA> NA
## 947 TRUE 1 777 3153 QKT21494.1 1.000
## 948 FALSE 1 1602 3153 QKT21493.1 1.000
## 1187 FALSE 2 150 1830 <NA> NA
## 1188 FALSE 1 180 1830 <NA> NA
## 1189 TRUE 2 504 1830 <NA> NA
## 1190 TRUE 1 429 1830 <NA> NA
## 1191 FALSE 1 288 1830 <NA> NA
## 1192 FALSE 2 558 1830 <NA> NA
## 1193 TRUE 1 162 1830 <NA> NA
## 1194 TRUE 2 165 1830 <NA> NA
## 1195 FALSE 2 348 1830 <NA> NA
## 1196 TRUE 1 222 1830 <NA> NA
## alignment_length mismatches gap_opens qstart qend sstart send evalue
## 106 NA NA NA NA NA NA NA NA
## 107 NA NA NA NA NA NA NA NA
## 108 65 1 0 1 65 195 259 4.251e-36
## 266 61 0 0 1 61 474 534 3.467e-36
## 267 NA NA NA NA NA NA NA NA
## 503 231 3 0 1 231 1 231 4.847e-146
## 750 1074 10 0 1 1074 1 1074 0.000e+00
## 751 93 56 0 1 91 167 259 5.418e-09
## 752 NA NA NA NA NA NA NA NA
## 753 NA NA NA NA NA NA NA NA
## 754 NA NA NA NA NA NA NA NA
## 755 163 0 0 1 163 1 163 4.686e-103
## 756 223 0 0 1 223 1 223 1.391e-117
## 757 NA NA NA NA NA NA NA NA
## 758 195 0 0 1 195 1 195 1.131e-112
## 759 79 24 0 40 118 139 214 5.457e-21
## 760 459 1 0 1 459 1 459 2.508e-293
## 787 NA NA NA NA NA NA NA NA
## 788 1083 13 0 1 1083 1 1083 0.000e+00
## 789 423 0 0 1 423 1 423 4.661e-281
## 790 NA NA NA NA NA NA NA NA
## 820 1202 23 0 1 1202 1 1202 0.000e+00
## 850 391 0 0 1 391 1 391 2.261e-263
## 851 NA NA NA NA NA NA NA NA
## 852 NA NA NA NA NA NA NA NA
## 853 279 0 0 1 279 1 279 4.857e-185
## 907 564 36 0 1 564 1 564 0.000e+00
## 908 253 0 0 1 253 1 253 9.866e-169
## 909 NA NA NA NA NA NA NA NA
## 910 50 6 0 1 49 1 50 1.081e-17
## 911 NA NA NA NA NA NA NA NA
## 947 259 0 0 1 259 1 259 3.279e-171
## 948 534 0 0 1 534 1 534 0.000e+00
## 1187 NA NA NA NA NA NA NA NA
## 1188 NA NA NA NA NA NA NA NA
## 1189 NA NA NA NA NA NA NA NA
## 1190 NA NA NA NA NA NA NA NA
## 1191 NA NA NA NA NA NA NA NA
## 1192 NA NA NA NA NA NA NA NA
## 1193 NA NA NA NA NA NA NA NA
## 1194 NA NA NA NA NA NA NA NA
## 1195 NA NA NA NA NA NA NA NA
## 1196 NA NA NA NA NA NA NA NA
## bitscore
## 106 NA
## 107 NA
## 108 132
## 266 132
## 267 NA
## 503 459
## 750 2120
## 751 62
## 752 NA
## 753 NA
## 754 NA
## 755 331
## 756 376
## 757 NA
## 758 361
## 759 92
## 760 897
## 787 NA
## 788 2209
## 789 859
## 790 NA
## 820 2463
## 850 807
## 851 NA
## 852 NA
## 853 574
## 907 1088
## 908 526
## 909 NA
## 910 79
## 911 NA
## 947 533
## 948 1090
## 1187 NA
## 1188 NA
## 1189 NA
## 1190 NA
## 1191 NA
## 1192 NA
## 1193 NA
## 1194 NA
## 1195 NA
## 1196 NA
## annotation
## 106 <NA>
## 107 <NA>
## 108 putative glycoprotein [Vesanto virus]
## 266 putative DUF3472 protein [Vesanto virus]
## 267 <NA>
## 503 hypothetical protein 1 [Vesanto virus]
## 750 putative structural protein [Vesanto virus]
## 751 hypothetical protein [Wolbachia endosymbiont of Drosophila ananassae]
## 752 <NA>
## 753 <NA>
## 754 <NA>
## 755 putative coat protein [Vesanto virus]
## 756 hypothetical protein 3 [Vesanto virus]
## 757 <NA>
## 758 hypothetical protein 2 [Vesanto virus]
## 759 hypothetical protein [Vesanto virus]
## 760 putative NS1 protein [Vesanto virus]
## 787 <NA>
## 788 putative structural protein [Vesanto virus]
## 789 putative capsid protein [Vesanto virus]
## 790 <NA>
## 820 putative DNA polymerase B [Vesanto virus]
## 850 putative nuclease domain protein [Vesanto virus]
## 851 <NA>
## 852 <NA>
## 853 putative glycoprotein [Vesanto virus]
## 907 putative DUF3472 protein [Vesanto virus]
## 908 putative glycoprotein [Vesanto virus]
## 909 <NA>
## 910 putative DUF3472 protein [Vesanto virus]
## 911 <NA>
## 947 putative glycoprotein [Vesanto virus]
## 948 putative DUF3472 protein [Vesanto virus]
## 1187 <NA>
## 1188 <NA>
## 1189 <NA>
## 1190 <NA>
## 1191 <NA>
## 1192 <NA>
## 1193 <NA>
## 1194 <NA>
## 1195 <NA>
## 1196 <NA>
import all wta contigs and their gff
contigs_wta=readBStringSet("../sequences/wta_final_contigs_with_unassigned.fa")
head(contigs_wta)
## BStringSet object of length 6:
## width seq names
## [1] 1662 CGAGATACATCGGTGACTGGAGG...ACACTCAATCACACAAAAGAAAT contig_10017
## [2] 1659 CAATAAAGATAAGAATGCAAACA...TTTGTTTCAATATAATTTTTGAA contig_10041
## [3] 1651 AATATTTGTGCCAAAAGAGCATC...TATACAATAAGACTTACCATACT contig_10108
## [4] 1621 CAAGAGTATCGATCTAAAATTAA...GAATAAAGCTGGTTTAAAGTCCC contig_10399
## [5] 1613 AGGAGAAGGAGAGACTAAAAAGC...TCGAAGCGTAAGGGAAAAGGAGC contig_10471
## [6] 1587 GAATAAATTATGGTGTAGTAAGA...AAAGCATATCTAAAGTACGCTAT contig_10707
gff_wta=read.table("../sequences/wta_final_contigs.gff")
# add unassigned contigs
gff_wta_unassigned0=read.table("../sequences/final_contigs_unassigned_wgta_prediction_option0.gff")
gff_wta_unassigned1=read.table("../sequences/final_contigs_unassigned_wgta_prediction_option1.gff")
# we will use the phase column to indicate which getorf option has been used : 1=>1; 2=>0
names(gff_wta)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
names(gff_wta_unassigned0)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
names(gff_wta_unassigned1)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
gff_wta$phase=1
gff_wta_unassigned1$phase=1
gff_wta_unassigned0$phase=2
gff_wta=rbind(gff_wta, gff_wta_unassigned0, gff_wta_unassigned1)
gff_wta$strand=as.factor(gff_wta$strand)
levels(gff_wta$strand)=c("FALSE", "TRUE")
gff_wta$phase=as.factor(gff_wta$phase)
head(gff_wta)
Add contig length
contig_length=width(contigs_wta)
names(contig_length)=names(contigs_wta)
gff_wta=merge(gff_wta, contig_length, by.x="seqid", by.y="row.names")
names(gff_wta)[10]="seq_length"
Add an orf id column
orf_names=paste(gff_wta$seqid, gff_wta$start, gff_wta$end, gff_wta$strand, sep="_")
orf_names=sub(pattern = "TRUE", "+", x = orf_names)
orf_names=sub(pattern = "FALSE", "-", x = orf_names)
gff_wta$orf_name=orf_names
import blastp results
wta_blast=read.table("../TABLES/wta_final_contigs_getorf.blastp.tab")
names=c("query_id", "subject_id", "identity", "alignment_length", "mismatches", "gap_opens", "qstart", "qend", "sstart", "send", "evalue", "bitscore")
names(wta_blast)=names
head(wta_blast)
import subject_id sequences (with informative names)
subject_id_seqs=readBStringSet("../sequences/wta_protein_homologs.fasta")
subject_id_seqs
## BStringSet object of length 2132:
## width seq names
## [1] 874 MQCPNQNHMLVNRAMVVAALDS...SLAQKLPCGGVVIQVIHNVYV NP_041191.1 RNA d...
## [2] 1596 MAHFQQTMNTKVTEAGIGRNSL...VHKTAVNGSFAFCSIVKYLSD NP_056808.1 181 K...
## [3] 458 MQFYYDTLLPGNSTILNEYDAV...VHKTAVNGSFAFCSIVKYLSD NP_056810.1 52KDa...
## [4] 1648 MANINEQINNQRDAAASGRNNL...CLCKYLSDKRLFRSLYIDVSK NP_044577.1 186K ...
## [5] 1601 MAQFQQTIDMQTLQAAAGRNSL...AFCSIIKYLSDKRLFRDLFFV NP_046151.1 unnam...
## ... ... ...
## [2128] 468 MSRYGFNNNRGAGQQQWRNFGP...PFKPVLRVKKFCSIDVKPVSM QMI58126.1 putati...
## [2129] 292 MAAPKSKFVFDFEKLKQTFVEI...PKIPVKCAANFLGTKAGSGKI XP_036671730.1 un...
## [2130] 292 MAAPKSKFVFDFEKLKQTFVEI...PKIPVKCAANFLGTKAGSGKI XP_036671731.1 un...
## [2131] 292 MAAPKSKFVFDFEKLKQTFVEI...PKIPVKCAANFLGTKAGSGKI XP_036671732.1 un...
## [2132] 1879 MDSLLDTSFTERFMSDPIYDGE...FRDDGTRRGGIHSRLGFVLVI YP_009976137.1 RN...
short_names=unlist(lapply(strsplit(names(subject_id_seqs), " "), FUN=function(x){return(x[1])}))
annotation=unlist(lapply(strsplit(names(subject_id_seqs), " "), FUN=function(x){
res=paste0(x[-1],collapse = " ")
return(res)}))
df=data.frame(short_names, annotation)
Add this information to blast output
wta_blast2=merge(wta_blast, df, by.x="subject_id", by.y="short_names", all.x=TRUE, all.y=FALSE)
Combine this to the gff
# select the smallest evalue for each query (group)
wta_blast2_besthit = wta_blast2 %>% group_by(query_id) %>% arrange(evalue) %>% dplyr::slice(1)
#wta_blast2_besthit=arrange(wta_blast2, evalue)
gff_wta2=merge(gff_wta, wta_blast2_besthit, by.x="orf_name", by.y="query_id", all.x=TRUE, all.y=FALSE)
write to disk:
write.table(gff_wta2, file = "../figures/orf_predictions/gff_wta2.txt", row.names = FALSE, col.names = TRUE, quote=FALSE, sep="\t")
head(gff_wta2)
wta_taxo_info=read.table("../TABLES/wta_protein_homologs.ids_taxid2.txt", sep=";", h=TRUE)
dim(wta_taxo_info)
## [1] 2132 24
head(wta_taxo_info)
Define family colours
wta_taxo_info$family_colour=as.factor(wta_taxo_info$family)
levels(wta_taxo_info$family_colour)=c(brewer.pal(12, name = "Set3"), brewer.pal(12, name = "Paired"), brewer.pal(8, name = "Set2"), brewer.pal(6, name = "Dark2"))
seqs_wta=readBStringSet("../sequences/wta_final_contigs_with_unassigned.fa")
Define the corresponding contigs :
contig_set=paste0("contig_", c(9575, 9476, 11373, 7659, 17024, 12188, 18451, 7863, 9319, 22044, 9859, 10399, 13219)) # Chaq included
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Galbut_virus_D.mel'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Galbut_virus_D.mel")
## orf_name seqid source type start end score strand
## 18 contig_10399_1_183_- contig_10399 getorf_JV gene 1 183 . FALSE
## 19 contig_10399_275_1612_+ contig_10399 getorf_JV gene 275 1612 . TRUE
## 47 contig_11373_66_1445_- contig_11373 getorf_JV gene 66 1445 . FALSE
## 67 contig_12188_38_1372_+ contig_12188 getorf_JV gene 38 1372 . TRUE
## 107 contig_13219_258_1214_+ contig_13219 getorf_JV gene 258 1214 . TRUE
## 176 contig_17024_122_1174_+ contig_17024 getorf_JV gene 122 1174 . TRUE
## 224 contig_18451_625_1059_+ contig_18451 getorf_JV gene 625 1059 . TRUE
## 288 contig_22044_168_914_+ contig_22044 getorf_JV gene 168 914 . TRUE
## 393 contig_7659_1047_1988_+ contig_7659 getorf_JV gene 1047 1988 . TRUE
## 394 contig_7659_3_1001_- contig_7659 getorf_JV gene 3 1001 . FALSE
## 398 contig_7863_34_345_- contig_7863 getorf_JV gene 34 345 . FALSE
## 399 contig_7863_504_1829_+ contig_7863 getorf_JV gene 504 1829 . TRUE
## 445 contig_9319_139_1473_+ contig_9319 getorf_JV gene 139 1473 . TRUE
## 457 contig_9476_3_1085_- contig_9476 getorf_JV gene 3 1085 . FALSE
## 458 contig_9575_125_1594_+ contig_9575 getorf_JV gene 125 1594 . TRUE
## 472 contig_9859_476_1492_+ contig_9859 getorf_JV gene 476 1492 . TRUE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 18 1 183 1621 <NA> NA NA NA
## 19 1 1338 1621 AWY11128.1 0.991 446 4
## 47 1 1380 1525 AWY11085.1 0.958 460 19
## 67 1 1335 1458 AWY11051.1 0.888 438 49
## 107 1 957 1382 AKH40308.1 0.987 319 4
## 176 1 1053 1176 AWY11143.1 0.971 351 10
## 224 1 435 1120 AWY11050.1 0.979 145 3
## 288 1 747 1012 AWY11144.1 0.991 249 2
## 393 1 942 1990 AWY11085.1 0.968 314 10
## 394 1 999 1990 AWY11095.1 0.968 258 8
## 398 1 312 1953 AWY11174.1 0.942 104 6
## 399 1 1326 1953 AWY11166.1 0.975 442 11
## 445 1 1335 1748 AWY11144.1 0.991 445 4
## 457 1 1083 1114 AWY11130.1 0.886 361 41
## 458 1 1470 1604 AWY11049.1 0.953 490 23
## 472 1 1017 1681 AWY11142.1 0.993 293 2
## gap_opens qstart qend sstart send evalue bitscore
## 18 NA NA NA NA NA NA NA
## 19 0 1 446 95 540 3.065e-311 948
## 47 0 1 460 36 494 9.548e-303 924
## 67 0 1 438 1 438 2.513e-261 804
## 107 0 1 319 1 319 3.619e-205 634
## 176 0 1 351 1 351 6.869e-232 713
## 224 0 1 145 352 496 1.216e-91 297
## 288 0 1 249 197 445 6.551e-170 529
## 393 0 1 314 36 349 3.472e-207 640
## 394 0 76 333 83 340 9.171e-169 530
## 398 0 1 104 339 442 1.466e-63 214
## 399 0 1 442 1 442 1.245e-281 862
## 445 0 1 445 1 445 1.408e-297 908
## 457 0 1 361 1 361 2.254e-206 640
## 458 0 1 490 51 540 0.000e+00 1005
## 472 0 1 293 240 532 6.874e-202 626
## annotation
## 18 <NA>
## 19 putative RNA-dependent RNA polymerase [Galbut virus]
## 47 orf1 [Galbut virus]
## 67 orf1 [Galbut virus]
## 107 orf1 [Chaq virus]
## 176 orf1 [Galbut virus]
## 224 orf1 [Galbut virus]
## 288 orf1 [Galbut virus]
## 393 orf1 [Galbut virus]
## 394 orf1 [Galbut virus]
## 398 orf1 [Galbut virus]
## 399 orf1 [Galbut virus]
## 445 orf1 [Galbut virus]
## 457 orf1 [Galbut virus]
## 458 putative RNA-dependent RNA polymerase [Galbut virus]
## 472 putative RNA-dependent RNA polymerase [Galbut virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 18 contig_10399_1_183_- contig_10399 getorf_JV gene 1 183 . FALSE
## 19 contig_10399_275_1612_+ contig_10399 getorf_JV gene 275 1612 . TRUE
## 47 contig_11373_66_1445_- contig_11373 getorf_JV gene 66 1445 . FALSE
## 67 contig_12188_38_1372_+ contig_12188 getorf_JV gene 38 1372 . TRUE
## 107 contig_13219_258_1214_+ contig_13219 getorf_JV gene 258 1214 . TRUE
## 176 contig_17024_122_1174_+ contig_17024 getorf_JV gene 122 1174 . TRUE
## 224 contig_18451_625_1059_+ contig_18451 getorf_JV gene 625 1059 . TRUE
## 288 contig_22044_168_914_+ contig_22044 getorf_JV gene 168 914 . TRUE
## 393 contig_7659_1047_1988_+ contig_7659 getorf_JV gene 1047 1988 . TRUE
## 394 contig_7659_3_1001_- contig_7659 getorf_JV gene 3 1001 . FALSE
## 398 contig_7863_34_345_- contig_7863 getorf_JV gene 34 345 . FALSE
## 399 contig_7863_504_1829_+ contig_7863 getorf_JV gene 504 1829 . TRUE
## 445 contig_9319_139_1473_+ contig_9319 getorf_JV gene 139 1473 . TRUE
## 457 contig_9476_3_1085_- contig_9476 getorf_JV gene 3 1085 . FALSE
## 458 contig_9575_125_1594_+ contig_9575 getorf_JV gene 125 1594 . TRUE
## 472 contig_9859_476_1492_+ contig_9859 getorf_JV gene 476 1492 . TRUE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 18 1 183 1621 <NA> NA NA NA
## 19 1 1338 1621 AWY11128.1 0.991 446 4
## 47 1 1380 1525 AWY11085.1 0.958 460 19
## 67 1 1335 1458 AWY11051.1 0.888 438 49
## 107 1 957 1382 AKH40308.1 0.987 319 4
## 176 1 1053 1176 AWY11143.1 0.971 351 10
## 224 1 435 1120 AWY11050.1 0.979 145 3
## 288 1 747 1012 AWY11144.1 0.991 249 2
## 393 1 942 1990 AWY11085.1 0.968 314 10
## 394 1 999 1990 AWY11095.1 0.968 258 8
## 398 1 312 1953 AWY11174.1 0.942 104 6
## 399 1 1326 1953 AWY11166.1 0.975 442 11
## 445 1 1335 1748 AWY11144.1 0.991 445 4
## 457 1 1083 1114 AWY11130.1 0.886 361 41
## 458 1 1470 1604 AWY11049.1 0.953 490 23
## 472 1 1017 1681 AWY11142.1 0.993 293 2
## gap_opens qstart qend sstart send evalue bitscore
## 18 NA NA NA NA NA NA NA
## 19 0 1 446 95 540 3.065e-311 948
## 47 0 1 460 36 494 9.548e-303 924
## 67 0 1 438 1 438 2.513e-261 804
## 107 0 1 319 1 319 3.619e-205 634
## 176 0 1 351 1 351 6.869e-232 713
## 224 0 1 145 352 496 1.216e-91 297
## 288 0 1 249 197 445 6.551e-170 529
## 393 0 1 314 36 349 3.472e-207 640
## 394 0 76 333 83 340 9.171e-169 530
## 398 0 1 104 339 442 1.466e-63 214
## 399 0 1 442 1 442 1.245e-281 862
## 445 0 1 445 1 445 1.408e-297 908
## 457 0 1 361 1 361 2.254e-206 640
## 458 0 1 490 51 540 0.000e+00 1005
## 472 0 1 293 240 532 6.874e-202 626
## annotation
## 18 <NA>
## 19 putative RNA-dependent RNA polymerase [Galbut virus]
## 47 orf1 [Galbut virus]
## 67 orf1 [Galbut virus]
## 107 orf1 [Chaq virus]
## 176 orf1 [Galbut virus]
## 224 orf1 [Galbut virus]
## 288 orf1 [Galbut virus]
## 393 orf1 [Galbut virus]
## 394 orf1 [Galbut virus]
## 398 orf1 [Galbut virus]
## 399 orf1 [Galbut virus]
## 445 orf1 [Galbut virus]
## 457 orf1 [Galbut virus]
## 458 putative RNA-dependent RNA polymerase [Galbut virus]
## 472 putative RNA-dependent RNA polymerase [Galbut virus]
Define the corresponding contigs :
contig_set=paste0("contig_", c(10907, 7968, 10893, 18794, 7817)) # Chaq included
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Galbut_virus_D.sim'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Galbut_virus_D.sim")
## orf_name seqid source type start end score strand
## 24 contig_10893_227_1075_+ contig_10893 getorf_JV gene 227 1075 . TRUE
## 25 contig_10907_65_1546_- contig_10907 getorf_JV gene 65 1546 . FALSE
## 227 contig_18794_3_1001_- contig_18794 getorf_JV gene 3 1001 . FALSE
## 395 contig_7817_3_935_- contig_7817 getorf_JV gene 3 935 . FALSE
## 400 contig_7968_1496_1651_- contig_7968 getorf_JV gene 1496 1651 . FALSE
## 401 contig_7968_1657_1935_+ contig_7968 getorf_JV gene 1657 1935 . TRUE
## 402 contig_7968_167_1489_- contig_7968 getorf_JV gene 167 1489 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 24 1 849 1077 AWY11128.1 0.982 283 5
## 25 1 1482 1566 AWY11085.1 0.963 494 18
## 227 1 999 1108 AWY11172.1 0.975 333 8
## 395 1 933 1137 AKH40308.1 0.863 315 42
## 400 1 156 1937 <NA> NA NA NA
## 401 1 279 1937 AWY11052.1 0.956 93 4
## 402 1 1323 1937 AWY11086.1 0.805 442 86
## gap_opens qstart qend sstart send evalue bitscore
## 24 0 1 283 240 522 2.308e-195 604
## 25 0 1 494 1 494 0.000e+00 989
## 227 0 1 333 51 383 3.418e-227 699
## 395 0 1 311 1 315 4.719e-179 559
## 400 NA NA NA NA NA NA NA
## 401 0 1 93 51 143 4.166e-56 192
## 402 0 1 441 1 442 1.332e-234 726
## annotation
## 24 putative RNA-dependent RNA polymerase [Galbut virus]
## 25 orf1 [Galbut virus]
## 227 putative RNA-dependent RNA polymerase [Galbut virus]
## 395 orf1 [Chaq virus]
## 400 <NA>
## 401 putative RNA-dependent RNA polymerase [Galbut virus]
## 402 orf1 [Galbut virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 24 contig_10893_227_1075_+ contig_10893 getorf_JV gene 227 1075 . TRUE
## 25 contig_10907_65_1546_- contig_10907 getorf_JV gene 65 1546 . FALSE
## 227 contig_18794_3_1001_- contig_18794 getorf_JV gene 3 1001 . FALSE
## 395 contig_7817_3_935_- contig_7817 getorf_JV gene 3 935 . FALSE
## 400 contig_7968_1496_1651_- contig_7968 getorf_JV gene 1496 1651 . FALSE
## 401 contig_7968_1657_1935_+ contig_7968 getorf_JV gene 1657 1935 . TRUE
## 402 contig_7968_167_1489_- contig_7968 getorf_JV gene 167 1489 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 24 1 849 1077 AWY11128.1 0.982 283 5
## 25 1 1482 1566 AWY11085.1 0.963 494 18
## 227 1 999 1108 AWY11172.1 0.975 333 8
## 395 1 933 1137 AKH40308.1 0.863 315 42
## 400 1 156 1937 <NA> NA NA NA
## 401 1 279 1937 AWY11052.1 0.956 93 4
## 402 1 1323 1937 AWY11086.1 0.805 442 86
## gap_opens qstart qend sstart send evalue bitscore
## 24 0 1 283 240 522 2.308e-195 604
## 25 0 1 494 1 494 0.000e+00 989
## 227 0 1 333 51 383 3.418e-227 699
## 395 0 1 311 1 315 4.719e-179 559
## 400 NA NA NA NA NA NA NA
## 401 0 1 93 51 143 4.166e-56 192
## 402 0 1 441 1 442 1.332e-234 726
## annotation
## 24 putative RNA-dependent RNA polymerase [Galbut virus]
## 25 orf1 [Galbut virus]
## 227 putative RNA-dependent RNA polymerase [Galbut virus]
## 395 orf1 [Chaq virus]
## 400 <NA>
## 401 putative RNA-dependent RNA polymerase [Galbut virus]
## 402 orf1 [Galbut virus]
Define the corresponding contigs :
contig_set=c("contig_22597")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'LbTV_Lb'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "LbTV_Lb")
## orf_name seqid source type start end score
## 293 contig_22597_240_2612_- contig_22597 getorf_JV gene 240 2612 .
## 294 contig_22597_2663_7720_- contig_22597 getorf_JV gene 2663 7720 .
## strand phase attributes seq_length subject_id identity alignment_length
## 293 FALSE 1 2373 7844 YP_009072448.1 1.000 791
## 294 FALSE 1 5058 7844 YP_009072447.2 0.997 1686
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 293 0 0 1 791 1 791 0 1679
## 294 5 0 1 1686 1 1686 0 3425
## annotation
## 293 RNA-dependent RNA polymerase [Leptopilina boulardi Toti-like virus]
## 294 putative coat protein [Leptopilina boulardi Toti-like virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
Define the corresponding contigs :
contig_set=c("contig_9042", "contig_22560")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Hepe-Virga_L.b'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Hepe-Virga_L.b")
## orf_name seqid source type start end score
## 290 contig_22560_207_7349_+ contig_22560 getorf_JV gene 207 7349 .
## 291 contig_22560_7383_8246_+ contig_22560 getorf_JV gene 7383 8246 .
## 430 contig_9042_210_548_+ contig_9042 getorf_JV gene 210 548 .
## 431 contig_9042_538_1146_+ contig_9042 getorf_JV gene 538 1146 .
## strand phase attributes seq_length subject_id identity alignment_length
## 290 TRUE 1 7143 8247 AWA82269.1 0.361 2379
## 291 TRUE 1 864 8247 AWA82267.1 0.370 54
## 430 TRUE 1 339 1422 <NA> NA NA
## 431 TRUE 1 609 1422 AWA82265.1 0.512 162
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 290 1434 0 3 2381 138 2382 0.000e+00 1371
## 291 33 0 184 237 151 204 1.620e-05 51
## 430 NA NA NA NA NA NA NA NA
## 431 77 0 36 197 33 190 2.049e-46 170
## annotation
## 290 putative RNA-dependent RNA polymerase [Saiwaicho virus]
## 291 hypothetical protein [Saiwaicho virus]
## 430 <NA>
## 431 hypothetical protein [Saiwaicho virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score
## 290 contig_22560_207_7349_+ contig_22560 getorf_JV gene 207 7349 .
## 291 contig_22560_7383_8246_+ contig_22560 getorf_JV gene 7383 8246 .
## 430 contig_9042_210_548_+ contig_9042 getorf_JV gene 210 548 .
## 431 contig_9042_538_1146_+ contig_9042 getorf_JV gene 538 1146 .
## strand phase attributes seq_length subject_id identity alignment_length
## 290 TRUE 1 7143 8247 AWA82269.1 0.361 2379
## 291 TRUE 1 864 8247 AWA82267.1 0.370 54
## 430 TRUE 1 339 1422 <NA> NA NA
## 431 TRUE 1 609 1422 AWA82265.1 0.512 162
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 290 1434 0 3 2381 138 2382 0.000e+00 1371
## 291 33 0 184 237 151 204 1.620e-05 51
## 430 NA NA NA NA NA NA NA NA
## 431 77 0 36 197 33 190 2.049e-46 170
## annotation
## 290 putative RNA-dependent RNA polymerase [Saiwaicho virus]
## 291 hypothetical protein [Saiwaicho virus]
## 430 <NA>
## 431 hypothetical protein [Saiwaicho virus]
This virus was first described in Medd et al. 2018 as an Hepe-Virga virus. Similarly to what they found, the polyprotein contains the following domains :
domains
The second ORF from this contig is homologous to the hypothetical protein AWA82267.1 [Hepe-Virga_L.b virus].
The largest ORF from contig_9042 shows homology to the hypothetical protein AWA82265.1 which contains a conserved domain (pfam16504 : Putative virion membrane protein of plant and insect virus).
We built a phylogeny based on the RdRp domain only.
p = plot_phylogeny(file = "../phylogenies/contig_22560_207_7349_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
ggsave(filename = "../phylogenies/contig_22560_207_7349_+.pdf", plot = p, width = 10, height = 8)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=c("contig_11634", "contig_10471")
contig_set_unassigned=paste0("contig_", c(11872))
# store for later fusion of corresponding lines
virus_list$'Partiti-like5_D.sub'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Partiti-like5_D.sub")
## orf_name seqid source type start end score strand
## 20 contig_10471_238_1494_- contig_10471 getorf_JV gene 238 1494 . FALSE
## 60 contig_11634_73_1485_- contig_11634 getorf_JV gene 73 1485 . FALSE
## 61 contig_11872_1313_1480_- contig_11872 getorf_JV gene 1313 1480 . FALSE
## 62 contig_11872_1332_1481_+ contig_11872 getorf_JV gene 1332 1481 . TRUE
## 63 contig_11872_159_1214_- contig_11872 getorf_JV gene 159 1214 . FALSE
## 64 contig_11872_159_1226_- contig_11872 getorf_JV gene 159 1226 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 20 1 1257 1613 YP_009329883.1 0.403 393
## 60 1 1413 1503 YP_009329882.1 0.629 470
## 61 2 168 1482 <NA> NA NA
## 62 1 150 1482 <NA> NA NA
## 63 1 1056 1482 <NA> NA NA
## 64 2 1068 1482 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 20 228 0 26 418 26 408 2.010e-82 285
## 60 173 0 1 470 1 467 1.083e-209 656
## 61 NA NA NA NA NA NA NA NA
## 62 NA NA NA NA NA NA NA NA
## 63 NA NA NA NA NA NA NA NA
## 64 NA NA NA NA NA NA NA NA
## annotation
## 20 hypothetical protein [Wuhan insect virus 23]
## 60 RdRp [Wuhan insect virus 23]
## 61 <NA>
## 62 <NA>
## 63 <NA>
## 64 <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 20 contig_10471_238_1494_- contig_10471 getorf_JV gene 238 1494 . FALSE
## 60 contig_11634_73_1485_- contig_11634 getorf_JV gene 73 1485 . FALSE
## 61 contig_11872_1313_1480_- contig_11872 getorf_JV gene 1313 1480 . FALSE
## 62 contig_11872_1332_1481_+ contig_11872 getorf_JV gene 1332 1481 . TRUE
## 63 contig_11872_159_1214_- contig_11872 getorf_JV gene 159 1214 . FALSE
## 64 contig_11872_159_1226_- contig_11872 getorf_JV gene 159 1226 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 20 1 1257 1613 YP_009329883.1 0.403 393
## 60 1 1413 1503 YP_009329882.1 0.629 470
## 61 2 168 1482 <NA> NA NA
## 62 1 150 1482 <NA> NA NA
## 63 1 1056 1482 <NA> NA NA
## 64 2 1068 1482 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 20 228 0 26 418 26 408 2.010e-82 285
## 60 173 0 1 470 1 467 1.083e-209 656
## 61 NA NA NA NA NA NA NA NA
## 62 NA NA NA NA NA NA NA NA
## 63 NA NA NA NA NA NA NA NA
## 64 NA NA NA NA NA NA NA NA
## annotation
## 20 hypothetical protein [Wuhan insect virus 23]
## 60 RdRp [Wuhan insect virus 23]
## 61 <NA>
## 62 <NA>
## 63 <NA>
## 64 <NA>
Wuhan insect virus 23 is composed of 2 segments (1477bp and 1381bp), very similar to what we found here.
https://www.genome.jp/virushostdb/1923727
We built a phylogeny based on the RdRp domain only.
p = plot_phylogeny(file = "../phylogenies/contig_11634_73_1485_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p=p+xlim(c(0,11))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_11634_73_1485_+.pdf", plot = p, width = 10, height = 8)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
It is unclear whether contig 11872 is part of this genome but interestingly it shows nucleotidic sequence similarity with KP757972.1 (Uncultured virus clone DmelPosVir_36 genomic sequence) obtained from mixed D.ananassae, D. melanogaster, D. malerkotliana, and Scaptodrosophila latifasciaeformis) Webster et al. 2015 Plos Biol.
Define the corresponding contigs :
contig_set=c("contig_2161", "contig_4815", "contig_18031")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Chuviridae1_D.im'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Chuviridae1_D.im")
## orf_name seqid source type start end score strand
## 217 contig_18031_1_1071_- contig_18031 getorf_JV gene 1 1071 . FALSE
## 277 contig_2161_3_5093_- contig_2161 getorf_JV gene 3 5093 . FALSE
## 337 contig_4815_21_899_+ contig_4815 getorf_JV gene 21 899 . TRUE
## 338 contig_4815_2386_2622_+ contig_4815 getorf_JV gene 2386 2622 . TRUE
## 339 contig_4815_977_2266_+ contig_4815 getorf_JV gene 977 2266 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 217 1 1071 1137 YP_009666257.1 0.325 347
## 277 1 5091 5326 YP_009337089.1 0.370 1647
## 337 1 879 2829 YP_009182178.1 0.387 186
## 338 1 237 2829 <NA> NA NA
## 339 1 1290 2829 YP_009337091.1 0.262 400
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 217 232 0 9 355 11 354 1.772e-58 212
## 277 1034 0 43 1684 201 1847 0.000e+00 1094
## 337 113 0 2 187 359 544 2.371e-38 151
## 338 NA NA NA NA NA NA NA NA
## 339 277 0 7 406 8 384 1.054e-34 144
## annotation
## 217 glycoprotein [Wuchang Cockroach Virus 3]
## 277 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 3]
## 337 putative glycoprotein [Imjin River virus 1]
## 338 <NA>
## 339 hypothetical protein [Hubei chuvirus-like virus 3]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 217 contig_18031_1_1071_- contig_18031 getorf_JV gene 1 1071 . FALSE
## 277 contig_2161_3_5093_- contig_2161 getorf_JV gene 3 5093 . FALSE
## 337 contig_4815_21_899_+ contig_4815 getorf_JV gene 21 899 . TRUE
## 338 contig_4815_2386_2622_+ contig_4815 getorf_JV gene 2386 2622 . TRUE
## 339 contig_4815_977_2266_+ contig_4815 getorf_JV gene 977 2266 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 217 1 1071 1137 YP_009666257.1 0.325 347
## 277 1 5091 5326 YP_009337089.1 0.370 1647
## 337 1 879 2829 YP_009182178.1 0.387 186
## 338 1 237 2829 <NA> NA NA
## 339 1 1290 2829 YP_009337091.1 0.262 400
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 217 232 0 9 355 11 354 1.772e-58 212
## 277 1034 0 43 1684 201 1847 0.000e+00 1094
## 337 113 0 2 187 359 544 2.371e-38 151
## 338 NA NA NA NA NA NA NA NA
## 339 277 0 7 406 8 384 1.054e-34 144
## annotation
## 217 glycoprotein [Wuchang Cockroach Virus 3]
## 277 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 3]
## 337 putative glycoprotein [Imjin River virus 1]
## 338 <NA>
## 339 hypothetical protein [Hubei chuvirus-like virus 3]
Based on RdRp (contig_2161), I built a phylogeny (first a NJ tree was built to subset the sequences used in the final ML phylogeny, using the option “select in alignment” in seaview):
p = plot_phylogeny(file = "../phylogenies/contig_2161_3_5093_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p+xlim(c(0,9.5))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_2161_-.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
It belongs to the Chuviridae family -ssRNA (Kafer et al. 2019). “composed of”chuviruses were found to appear in linear, circular, and seg- mented circular forms”. Wuchang cockrach virus 3 is composed of two circular segments (7kb and 4.7kb, see fig. 4 of Li et al. eLife 2015).
Define the corresponding contigs :
contig_set=c("contig_9152", "contig_8806", "contig_15880")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'Partiti-like1_D.sub|obs'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Partiti-like1_D.sub|obs")
## orf_name seqid source type start end score strand
## 155 contig_15880_3_848_- contig_15880 getorf_JV gene 3 848 . FALSE
## 156 contig_15880_974_1216_+ contig_15880 getorf_JV gene 974 1216 . TRUE
## 424 contig_8806_130_1752_- contig_8806 getorf_JV gene 130 1752 . FALSE
## 436 contig_9152_173_1483_- contig_9152 getorf_JV gene 173 1483 . FALSE
## 437 contig_9152_43_192_+ contig_9152 getorf_JV gene 43 192 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 155 1 846 1230 AWY11087.1 0.353 245
## 156 1 243 1230 <NA> NA NA
## 424 1 1623 1812 YP_009337870.1 0.481 510
## 436 1 1311 1768 QMI58123.1 0.317 347
## 437 1 150 1768 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 155 155 0 36 275 46 290 2.152e-40 156
## 156 NA NA NA NA NA NA NA NA
## 424 262 0 30 535 16 525 5.879e-155 501
## 436 232 0 56 402 91 430 1.321e-46 180
## 437 NA NA NA NA NA NA NA NA
## annotation
## 155 hypothetical protein [Chaq virus]
## 156 <NA>
## 424 RdRp [Hubei diptera virus 17]
## 436 putative capsid protein [Vera virus]
## 437 <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 155 contig_15880_3_848_- contig_15880 getorf_JV gene 3 848 . FALSE
## 156 contig_15880_974_1216_+ contig_15880 getorf_JV gene 974 1216 . TRUE
## 424 contig_8806_130_1752_- contig_8806 getorf_JV gene 130 1752 . FALSE
## 436 contig_9152_173_1483_- contig_9152 getorf_JV gene 173 1483 . FALSE
## 437 contig_9152_43_192_+ contig_9152 getorf_JV gene 43 192 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 155 1 846 1230 AWY11087.1 0.353 245
## 156 1 243 1230 <NA> NA NA
## 424 1 1623 1812 YP_009337870.1 0.481 510
## 436 1 1311 1768 QMI58123.1 0.317 347
## 437 1 150 1768 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 155 155 0 36 275 46 290 2.152e-40 156
## 156 NA NA NA NA NA NA NA NA
## 424 262 0 30 535 16 525 5.879e-155 501
## 436 232 0 56 402 91 430 1.321e-46 180
## 437 NA NA NA NA NA NA NA NA
## annotation
## 155 hypothetical protein [Chaq virus]
## 156 <NA>
## 424 RdRp [Hubei diptera virus 17]
## 436 putative capsid protein [Vera virus]
## 437 <NA>
We built a phylogeny based on RdRp :
p = plot_phylogeny("../phylogenies/contig_8806_130_1752_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,4)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_8806_130_1752_-.pdf", plot = p, width = 10, height = 8)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Hubei diptera virus 17 is composed of two segments: 1731bp (coding the RdRp) and 1602bp. Interestingly, the contig 9152 shows similarity with protein YP_009337871.1 encoded by the second segment of Hubei diptera virus 17.
https://www.genome.jp/dbget-bin/www_bget?refseq:NC_033301 https://www.genome.jp/dbget-bin/www_bget?refseq:NC_033302
Vera virus : Two segments; Polymerase originally identified as Partitiviridae-like-2 (KP757929) in Webster et al (2015) PLoS Biology 13(7): e1002210
It is unclear whether “chaq virus” is part of this genome or not.
Define the corresponding contigs :
contig_set=c("contig_1434")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$'dsRNA_virus1_Tricho'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "dsRNA_virus1_Tricho")
## orf_name seqid source type start end score strand
## 128 contig_1434_190_3780_- contig_1434 getorf_JV gene 190 3780 . FALSE
## 129 contig_1434_3900_6488_- contig_1434 getorf_JV gene 3900 6488 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 128 1 3591 7315 YP_003800003.1 0.357 807
## 129 1 2589 7315 YP_003800000.1 0.240 675
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 128 509 0 158 949 266 1072 2.067e-132 460
## 129 490 0 99 773 375 1020 5.589e-34 147
## annotation
## 128 RNA-directed RNA polymerase, partial [Circulifer tenellus virus 1]
## 129 proline-alanine-rich protein [Spissistilus festinus virus 1]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 128 contig_1434_190_3780_- contig_1434 getorf_JV gene 190 3780 . FALSE
## 129 contig_1434_3900_6488_- contig_1434 getorf_JV gene 3900 6488 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 128 1 3591 7315 YP_003800003.1 0.357 807
## 129 1 2589 7315 YP_003800000.1 0.240 675
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 128 509 0 158 949 266 1072 2.067e-132 460
## 129 490 0 99 773 375 1020 5.589e-34 147
## annotation
## 128 RNA-directed RNA polymerase, partial [Circulifer tenellus virus 1]
## 129 proline-alanine-rich protein [Spissistilus festinus virus 1]
We built a phylogeny based on RdRp (on a subset of sequences retrieved from ncbi) :
p = plot_phylogeny(file = "../phylogenies/contig_1434_190_3780_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,18) # adjust x axis
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_1434_190_3780_-.pdf", plot = p, height = 10, width = 8)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Circulifer tenelus virus is known as a non segmented dsRNA virus with a 8086bp genome (https://www.genome.jp/dbget-bin/www_bget?refseq:NC_014360).
Spissistilus festinus virus 1 is also a non segmented dsRNA virus with a 7951bp genome (https://www.genome.jp/dbget-bin/www_bget?refseq:NC_014359).
Persimmon latent virus is also a non segmented dsRNA virus with a 7475bp genome (https://www.genome.jp/dbget-bin/www_bget?refseq:NC_023983)
The contig we got is 7315bp which suggests it is complete or almost complete.
Define the corresponding contigs :
contig_set=c("contig_3022", "contig_2657", "contig_22938", "contig_7060", "contig_22971", "contig_21209", "contig_13029")
contig_set_unassigned=c("contig_7972", "contig_23185", "contig_13079")
# store for later fusion of corresponding lines
virus_list$'Reoviridae2_Tricho'=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae2_Tricho")
## orf_name seqid source type start end score strand
## 92 contig_13029_3_1022_- contig_13029 getorf_JV gene 3 1022 . FALSE
## 93 contig_13079_1_1101_+ contig_13079 getorf_JV gene 1 1101 . TRUE
## 94 contig_13079_256_1101_+ contig_13079 getorf_JV gene 256 1101 . TRUE
## 267 contig_21209_85_1032_+ contig_21209 getorf_JV gene 85 1032 . TRUE
## 307 contig_22938_57_3602_+ contig_22938 getorf_JV gene 57 3602 . TRUE
## 308 contig_22971_24_3410_- contig_22971 getorf_JV gene 24 3410 . FALSE
## 321 contig_23185_2_1948_+ contig_23185 getorf_JV gene 2 1948 . TRUE
## 322 contig_23185_404_1948_+ contig_23185 getorf_JV gene 404 1948 . TRUE
## 323 contig_2657_102_4322_+ contig_2657 getorf_JV gene 102 4322 . TRUE
## 325 contig_3022_134_3418_- contig_3022 getorf_JV gene 134 3418 . FALSE
## 377 contig_7060_186_1949_- contig_7060 getorf_JV gene 186 1949 . FALSE
## 403 contig_7972_185_1546_+ contig_7972 getorf_JV gene 185 1546 . TRUE
## 404 contig_7972_2_1546_+ contig_7972 getorf_JV gene 2 1546 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 92 1 1020 1027 AWA82242.1 0.285 305
## 93 2 1101 1102 <NA> NA NA
## 94 1 846 1102 <NA> NA NA
## 267 1 948 1032 AWA82242.1 0.271 316
## 307 1 3546 3604 YP_392502.1 0.291 1075
## 308 1 3387 3427 YP_392503.1 0.238 1061
## 321 2 1947 2148 <NA> NA NA
## 322 1 1545 2148 <NA> NA NA
## 323 1 4221 4389 YP_392501.1 0.356 1352
## 325 1 3285 3517 AWA82240.1 0.376 458
## 377 1 1764 2107 YP_392506.1 0.247 584
## 403 1 1362 1547 <NA> NA NA
## 404 2 1545 1547 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 92 213 0 1 299 72 376 6.256e-28 121
## 93 NA NA NA NA NA NA NA NA
## 94 NA NA NA NA NA NA NA NA
## 267 225 0 5 314 121 436 1.154e-25 114
## 307 757 0 114 1182 88 1162 6.796e-132 458
## 308 775 0 11 1071 86 1103 1.682e-74 280
## 321 NA NA NA NA NA NA NA NA
## 322 NA NA NA NA NA NA NA NA
## 323 831 0 20 1371 45 1336 3.086e-222 736
## 325 280 0 634 1091 614 1062 4.031e-80 297
## 377 432 0 2 585 10 584 7.794e-36 151
## 403 NA NA NA NA NA NA NA NA
## 404 NA NA NA NA NA NA NA NA
## annotation
## 92 hypothetical protein [Eccles virus]
## 93 <NA>
## 94 <NA>
## 267 hypothetical protein [Eccles virus]
## 307 hypothetical protein [Operophtera brumata reovirus]
## 308 hypothetical protein [Operophtera brumata reovirus]
## 321 <NA>
## 322 <NA>
## 323 RNA-dependent RNA polymerase [Operophtera brumata reovirus]
## 325 hypothetical protein [Eccles virus]
## 377 hypothetical protein [Operophtera brumata reovirus]
## 403 <NA>
## 404 <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 92 contig_13029_3_1022_- contig_13029 getorf_JV gene 3 1022 . FALSE
## 93 contig_13079_1_1101_+ contig_13079 getorf_JV gene 1 1101 . TRUE
## 94 contig_13079_256_1101_+ contig_13079 getorf_JV gene 256 1101 . TRUE
## 267 contig_21209_85_1032_+ contig_21209 getorf_JV gene 85 1032 . TRUE
## 307 contig_22938_57_3602_+ contig_22938 getorf_JV gene 57 3602 . TRUE
## 308 contig_22971_24_3410_- contig_22971 getorf_JV gene 24 3410 . FALSE
## 321 contig_23185_2_1948_+ contig_23185 getorf_JV gene 2 1948 . TRUE
## 322 contig_23185_404_1948_+ contig_23185 getorf_JV gene 404 1948 . TRUE
## 323 contig_2657_102_4322_+ contig_2657 getorf_JV gene 102 4322 . TRUE
## 325 contig_3022_134_3418_- contig_3022 getorf_JV gene 134 3418 . FALSE
## 377 contig_7060_186_1949_- contig_7060 getorf_JV gene 186 1949 . FALSE
## 403 contig_7972_185_1546_+ contig_7972 getorf_JV gene 185 1546 . TRUE
## 404 contig_7972_2_1546_+ contig_7972 getorf_JV gene 2 1546 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 92 1 1020 1027 AWA82242.1 0.285 305
## 93 2 1101 1102 <NA> NA NA
## 94 1 846 1102 <NA> NA NA
## 267 1 948 1032 AWA82242.1 0.271 316
## 307 1 3546 3604 YP_392502.1 0.291 1075
## 308 1 3387 3427 YP_392503.1 0.238 1061
## 321 2 1947 2148 <NA> NA NA
## 322 1 1545 2148 <NA> NA NA
## 323 1 4221 4389 YP_392501.1 0.356 1352
## 325 1 3285 3517 AWA82240.1 0.376 458
## 377 1 1764 2107 YP_392506.1 0.247 584
## 403 1 1362 1547 <NA> NA NA
## 404 2 1545 1547 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 92 213 0 1 299 72 376 6.256e-28 121
## 93 NA NA NA NA NA NA NA NA
## 94 NA NA NA NA NA NA NA NA
## 267 225 0 5 314 121 436 1.154e-25 114
## 307 757 0 114 1182 88 1162 6.796e-132 458
## 308 775 0 11 1071 86 1103 1.682e-74 280
## 321 NA NA NA NA NA NA NA NA
## 322 NA NA NA NA NA NA NA NA
## 323 831 0 20 1371 45 1336 3.086e-222 736
## 325 280 0 634 1091 614 1062 4.031e-80 297
## 377 432 0 2 585 10 584 7.794e-36 151
## 403 NA NA NA NA NA NA NA NA
## 404 NA NA NA NA NA NA NA NA
## annotation
## 92 hypothetical protein [Eccles virus]
## 93 <NA>
## 94 <NA>
## 267 hypothetical protein [Eccles virus]
## 307 hypothetical protein [Operophtera brumata reovirus]
## 308 hypothetical protein [Operophtera brumata reovirus]
## 321 <NA>
## 322 <NA>
## 323 RNA-dependent RNA polymerase [Operophtera brumata reovirus]
## 325 hypothetical protein [Eccles virus]
## 377 hypothetical protein [Operophtera brumata reovirus]
## 403 <NA>
## 404 <NA>
p = plot_phylogeny("../phylogenies/contig_2657_102_4322_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,9)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_2657_102_4322_+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Four “unknown” contigs show a perfect association with the other 7 contigs belonging to an apparent reovirus. contig_7972 does not display any similarity with known sequences. however it contains a nice single ORF. contig_23185 shows similarity with VP5 from Zoersel tick virus (QYV43123.1) based on a blastx on nr (evalue 2e-24, 23%identity) contig_9440 also show very weak similarity with Zoersel tick virus (VP7, QYV43125.1) based on a blastx on nr (evalue 3e-7, 24% identity). Zoersel tock virus is an unclassified Reoviridae. contig_13079 has no sequence similarity with any public sequence.
Define the corresponding contigs :
contig_set=paste0("contig_", c(21878, 4624, 5982, 2830))
contig_set_unassigned=paste0("contig_", c(14848, 15668, 16918, 3406, 14764, 9139, 19603, 15083))
# store for later fusion of corresponding lines
virus_list$"Reoviridae1_D.sub|obs"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae1_D.sub|obs")
## orf_name seqid source type start end score strand
## 134 contig_14764_63_1277_- contig_14764 getorf_JV gene 63 1277 . FALSE
## 135 contig_14764_63_1283_- contig_14764 getorf_JV gene 63 1283 . FALSE
## 136 contig_14848_255_1109_+ contig_14848 getorf_JV gene 255 1109 . TRUE
## 137 contig_14848_366_1109_+ contig_14848 getorf_JV gene 366 1109 . TRUE
## 139 contig_15083_2_1267_+ contig_15083 getorf_JV gene 2 1267 . TRUE
## 140 contig_15083_56_1267_+ contig_15083 getorf_JV gene 56 1267 . TRUE
## 146 contig_15668_125_1219_- contig_15668 getorf_JV gene 125 1219 . FALSE
## 147 contig_15668_125_1240_- contig_15668 getorf_JV gene 125 1240 . FALSE
## 174 contig_16918_11_1147_+ contig_16918 getorf_JV gene 11 1147 . TRUE
## 175 contig_16918_5_1147_+ contig_16918 getorf_JV gene 5 1147 . TRUE
## 235 contig_19603_1_957_+ contig_19603 getorf_JV gene 1 957 . TRUE
## 236 contig_19603_85_957_+ contig_19603 getorf_JV gene 85 957 . TRUE
## 285 contig_21878_68_1015_+ contig_21878 getorf_JV gene 68 1015 . TRUE
## 324 contig_2830_14_4147_+ contig_2830 getorf_JV gene 14 4147 . TRUE
## 330 contig_3406_3_3689_- contig_3406 getorf_JV gene 3 3689 . FALSE
## 331 contig_3406_3_3692_- contig_3406 getorf_JV gene 3 3692 . FALSE
## 334 contig_4624_11_2908_- contig_4624 getorf_JV gene 11 2908 . FALSE
## 357 contig_5982_2_1774_- contig_5982 getorf_JV gene 2 1774 . FALSE
## 434 contig_9139_104_1768_+ contig_9139 getorf_JV gene 104 1768 . TRUE
## 435 contig_9139_113_1768_+ contig_9139 getorf_JV gene 113 1768 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 134 1 1215 1285 <NA> NA NA
## 135 2 1221 1285 <NA> NA NA
## 136 2 855 1281 <NA> NA NA
## 137 1 744 1281 <NA> NA NA
## 139 2 1266 1269 <NA> NA NA
## 140 1 1212 1269 <NA> NA NA
## 146 1 1095 1240 <NA> NA NA
## 147 2 1116 1240 <NA> NA NA
## 174 1 1137 1182 <NA> NA NA
## 175 2 1143 1182 <NA> NA NA
## 235 2 957 1082 <NA> NA NA
## 236 1 873 1082 <NA> NA NA
## 285 1 948 1016 YP_001111373.1 0.311 299
## 324 1 4134 4308 YP_001111373.1 0.308 1362
## 330 1 3687 3699 <NA> NA NA
## 331 2 3690 3699 <NA> NA NA
## 334 1 2898 2915 NP_620543.1 0.201 951
## 357 1 1773 1794 YP_002790888.1 0.328 582
## 434 2 1665 1770 <NA> NA NA
## 435 1 1656 1770 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 134 NA NA NA NA NA NA NA NA
## 135 NA NA NA NA NA NA NA NA
## 136 NA NA NA NA NA NA NA NA
## 137 NA NA NA NA NA NA NA NA
## 139 NA NA NA NA NA NA NA NA
## 140 NA NA NA NA NA NA NA NA
## 146 NA NA NA NA NA NA NA NA
## 147 NA NA NA NA NA NA NA NA
## 174 NA NA NA NA NA NA NA NA
## 175 NA NA NA NA NA NA NA NA
## 235 NA NA NA NA NA NA NA NA
## 236 NA NA NA NA NA NA NA NA
## 285 198 0 15 302 367 665 4.158e-37 148
## 324 905 0 24 1332 25 1386 7.764e-189 635
## 330 NA NA NA NA NA NA NA NA
## 331 NA NA NA NA NA NA NA NA
## 334 755 0 12 962 16 961 1.370e-24 117
## 357 388 0 13 590 210 791 3.305e-90 315
## 434 NA NA NA NA NA NA NA NA
## 435 NA NA NA NA NA NA NA NA
## annotation
## 134 <NA>
## 135 <NA>
## 136 <NA>
## 137 <NA>
## 139 <NA>
## 140 <NA>
## 146 <NA>
## 147 <NA>
## 174 <NA>
## 175 <NA>
## 235 <NA>
## 236 <NA>
## 285 putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 324 putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 330 <NA>
## 331 <NA>
## 334 major core protein [Rice dwarf virus]
## 357 minor core protein [Homalodisca vitripennis reovirus]
## 434 <NA>
## 435 <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 134 contig_14764_63_1277_- contig_14764 getorf_JV gene 63 1277 . FALSE
## 135 contig_14764_63_1283_- contig_14764 getorf_JV gene 63 1283 . FALSE
## 136 contig_14848_255_1109_+ contig_14848 getorf_JV gene 255 1109 . TRUE
## 137 contig_14848_366_1109_+ contig_14848 getorf_JV gene 366 1109 . TRUE
## 139 contig_15083_2_1267_+ contig_15083 getorf_JV gene 2 1267 . TRUE
## 140 contig_15083_56_1267_+ contig_15083 getorf_JV gene 56 1267 . TRUE
## 146 contig_15668_125_1219_- contig_15668 getorf_JV gene 125 1219 . FALSE
## 147 contig_15668_125_1240_- contig_15668 getorf_JV gene 125 1240 . FALSE
## 174 contig_16918_11_1147_+ contig_16918 getorf_JV gene 11 1147 . TRUE
## 175 contig_16918_5_1147_+ contig_16918 getorf_JV gene 5 1147 . TRUE
## 235 contig_19603_1_957_+ contig_19603 getorf_JV gene 1 957 . TRUE
## 236 contig_19603_85_957_+ contig_19603 getorf_JV gene 85 957 . TRUE
## 285 contig_21878_68_1015_+ contig_21878 getorf_JV gene 68 1015 . TRUE
## 324 contig_2830_14_4147_+ contig_2830 getorf_JV gene 14 4147 . TRUE
## 330 contig_3406_3_3689_- contig_3406 getorf_JV gene 3 3689 . FALSE
## 331 contig_3406_3_3692_- contig_3406 getorf_JV gene 3 3692 . FALSE
## 334 contig_4624_11_2908_- contig_4624 getorf_JV gene 11 2908 . FALSE
## 357 contig_5982_2_1774_- contig_5982 getorf_JV gene 2 1774 . FALSE
## 434 contig_9139_104_1768_+ contig_9139 getorf_JV gene 104 1768 . TRUE
## 435 contig_9139_113_1768_+ contig_9139 getorf_JV gene 113 1768 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 134 1 1215 1285 <NA> NA NA
## 135 2 1221 1285 <NA> NA NA
## 136 2 855 1281 <NA> NA NA
## 137 1 744 1281 <NA> NA NA
## 139 2 1266 1269 <NA> NA NA
## 140 1 1212 1269 <NA> NA NA
## 146 1 1095 1240 <NA> NA NA
## 147 2 1116 1240 <NA> NA NA
## 174 1 1137 1182 <NA> NA NA
## 175 2 1143 1182 <NA> NA NA
## 235 2 957 1082 <NA> NA NA
## 236 1 873 1082 <NA> NA NA
## 285 1 948 1016 YP_001111373.1 0.311 299
## 324 1 4134 4308 YP_001111373.1 0.308 1362
## 330 1 3687 3699 <NA> NA NA
## 331 2 3690 3699 <NA> NA NA
## 334 1 2898 2915 NP_620543.1 0.201 951
## 357 1 1773 1794 YP_002790888.1 0.328 582
## 434 2 1665 1770 <NA> NA NA
## 435 1 1656 1770 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 134 NA NA NA NA NA NA NA NA
## 135 NA NA NA NA NA NA NA NA
## 136 NA NA NA NA NA NA NA NA
## 137 NA NA NA NA NA NA NA NA
## 139 NA NA NA NA NA NA NA NA
## 140 NA NA NA NA NA NA NA NA
## 146 NA NA NA NA NA NA NA NA
## 147 NA NA NA NA NA NA NA NA
## 174 NA NA NA NA NA NA NA NA
## 175 NA NA NA NA NA NA NA NA
## 235 NA NA NA NA NA NA NA NA
## 236 NA NA NA NA NA NA NA NA
## 285 198 0 15 302 367 665 4.158e-37 148
## 324 905 0 24 1332 25 1386 7.764e-189 635
## 330 NA NA NA NA NA NA NA NA
## 331 NA NA NA NA NA NA NA NA
## 334 755 0 12 962 16 961 1.370e-24 117
## 357 388 0 13 590 210 791 3.305e-90 315
## 434 NA NA NA NA NA NA NA NA
## 435 NA NA NA NA NA NA NA NA
## annotation
## 134 <NA>
## 135 <NA>
## 136 <NA>
## 137 <NA>
## 139 <NA>
## 140 <NA>
## 146 <NA>
## 147 <NA>
## 174 <NA>
## 175 <NA>
## 235 <NA>
## 236 <NA>
## 285 putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 324 putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 330 <NA>
## 331 <NA>
## 334 major core protein [Rice dwarf virus]
## 357 minor core protein [Homalodisca vitripennis reovirus]
## 434 <NA>
## 435 <NA>
p = plot_phylogeny("../phylogenies/contig_2830_14_4147_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,4)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_2830_14_4147_+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
The RdRp encoded by contig_2830_14_4147_ seems to be complete (aligns from positions 25 to 1402 with RdRp [Rice gall dwarf virus, ABF67520.1] which is 1458aa long). Surprisingly however, contig_21878_68_1015_+ also matches [Rice gall dwarf virus, ABF67520.1] RdRp but very partially and overlapping with previous (367-665). In fact, contig_21878_68_1015_ is completely nested and almost 100% identical as contig_2830_14_4147_ at the nucleotidic level. Maybe a subgenomic segment?
“unknown contigs” : contig_14848 has no significant homology with public db (blastx on nr) contig_15668 has no significant homology with public db (blastx on nr) contig_16918 has no significant homology with public db (blastx on nr) contig_3406 has hits with a reovirus minor outer capsid protein [Thrips tabaci associated reovirus 1] 2e-7 19% identity contig_14764 has no significant homology with public db (blastx on nr) contig_9139 has no significant homology with public db (blastx on nr) contig_19603 has no significant homology with public db (blastx on nr) contig_15083 has no significant homology with public db (blastx on nr)
However they all show a nice ORF. It looks like a complete reovirus with 12 segments. see file reovirus_D.sub_unassigned.blastx
PS : contig_16274 was initially annotated as Larkfield (100% identity) based on our first mmseqs2 blastx equivalent. Larkfield virus is described in Medd et al. 2018 (from D. suz). It is expected to be a totivirus. However, other part of the contig do show sequence similarities with partitiviruses (typically composed of two segments). Anyway, it is unclear whether this contig belongs to the reovirus genome. Probably not in fact.
Define the corresponding contigs :
contig_set=paste0("contig_", c(22700, 16274))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Larkfield_D.sub|obs"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Larkfield_D.sub|obs")
## orf_name seqid source type start end score
## 164 contig_16274_2_151_+ contig_16274 getorf_JV gene 2 151 .
## 165 contig_16274_221_1039_- contig_16274 getorf_JV gene 221 1039 .
## 295 contig_22700_3291_3449_- contig_22700 getorf_JV gene 3291 3449 .
## 296 contig_22700_3448_5490_+ contig_22700 getorf_JV gene 3448 5490 .
## 297 contig_22700_86_3157_+ contig_22700 getorf_JV gene 86 3157 .
## strand phase attributes seq_length subject_id identity alignment_length
## 164 TRUE 1 150 1211 <NA> NA NA
## 165 FALSE 1 819 1211 <NA> NA NA
## 295 FALSE 1 159 5915 <NA> NA NA
## 296 TRUE 1 2043 5915 AWA82248.1 1.000 681
## 297 TRUE 1 3072 5915 AWA82249.1 0.894 1024
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 164 NA NA NA NA NA NA NA NA
## 165 NA NA NA NA NA NA NA NA
## 295 NA NA NA NA NA NA NA NA
## 296 0 0 1 681 203 883 0 1416
## 297 97 0 1 1024 1 919 0 1789
## annotation
## 164 <NA>
## 165 <NA>
## 295 <NA>
## 296 putative RNA-dependent RNA polymerase [Larkfield virus]
## 297 hypothetical protein [Larkfield virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score
## 164 contig_16274_2_151_+ contig_16274 getorf_JV gene 2 151 .
## 165 contig_16274_221_1039_- contig_16274 getorf_JV gene 221 1039 .
## 295 contig_22700_3291_3449_- contig_22700 getorf_JV gene 3291 3449 .
## 296 contig_22700_3448_5490_+ contig_22700 getorf_JV gene 3448 5490 .
## 297 contig_22700_86_3157_+ contig_22700 getorf_JV gene 86 3157 .
## strand phase attributes seq_length subject_id identity alignment_length
## 164 TRUE 1 150 1211 <NA> NA NA
## 165 FALSE 1 819 1211 <NA> NA NA
## 295 FALSE 1 159 5915 <NA> NA NA
## 296 TRUE 1 2043 5915 AWA82248.1 1.000 681
## 297 TRUE 1 3072 5915 AWA82249.1 0.894 1024
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 164 NA NA NA NA NA NA NA NA
## 165 NA NA NA NA NA NA NA NA
## 295 NA NA NA NA NA NA NA NA
## 296 0 0 1 681 203 883 0 1416
## 297 97 0 1 1024 1 919 0 1789
## annotation
## 164 <NA>
## 165 <NA>
## 295 <NA>
## 296 putative RNA-dependent RNA polymerase [Larkfield virus]
## 297 hypothetical protein [Larkfield virus]
Define the corresponding contigs :
contig_set=c("contig_5374", "contig_12656", "contig_11296")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Chuviridae3_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Mivirus_tricho")
## orf_name seqid source type start end score strand
## 41 contig_11296_262_1518_- contig_11296 getorf_JV gene 262 1518 . FALSE
## 78 contig_12656_58_1335_+ contig_12656 getorf_JV gene 58 1335 . TRUE
## 346 contig_5374_1884_2501_+ contig_5374 getorf_JV gene 1884 2501 . TRUE
## 347 contig_5374_287_1540_+ contig_5374 getorf_JV gene 287 1540 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 41 1 1257 1532 YP_009337906.1 0.309 366
## 78 1 1278 1421 YP_009666257.1 0.340 377
## 346 1 618 2604 <NA> NA NA
## 347 1 1254 2604 YP_009337906.1 0.304 366
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 41 252 0 10 375 37 401 6.158e-53 198
## 78 247 0 6 382 231 605 1.259e-62 227
## 346 NA NA NA NA NA NA NA NA
## 347 253 0 9 374 37 401 6.940e-54 201
## annotation
## 41 hypothetical protein [Hubei chuvirus-like virus 1]
## 78 glycoprotein [Wuchang Cockroach Virus 3]
## 346 <NA>
## 347 hypothetical protein [Hubei chuvirus-like virus 1]
## Saving 7 x 5 in image
res[1]
## [[1]]
Contig 5374 and 11296 are 95% identical at the protein level: may correspond to two strains segregating in Trichopria sp.
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 41 contig_11296_262_1518_- contig_11296 getorf_JV gene 262 1518 . FALSE
## 78 contig_12656_58_1335_+ contig_12656 getorf_JV gene 58 1335 . TRUE
## 346 contig_5374_1884_2501_+ contig_5374 getorf_JV gene 1884 2501 . TRUE
## 347 contig_5374_287_1540_+ contig_5374 getorf_JV gene 287 1540 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 41 1 1257 1532 YP_009337906.1 0.309 366
## 78 1 1278 1421 YP_009666257.1 0.340 377
## 346 1 618 2604 <NA> NA NA
## 347 1 1254 2604 YP_009337906.1 0.304 366
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 41 252 0 10 375 37 401 6.158e-53 198
## 78 247 0 6 382 231 605 1.259e-62 227
## 346 NA NA NA NA NA NA NA NA
## 347 253 0 9 374 37 401 6.940e-54 201
## annotation
## 41 hypothetical protein [Hubei chuvirus-like virus 1]
## 78 glycoprotein [Wuchang Cockroach Virus 3]
## 346 <NA>
## 347 hypothetical protein [Hubei chuvirus-like virus 1]
Mivirus belong to the Chuviridae family and have either one or two segments encoding typically L and G protein a N protein and a VP.
Contig 11296 encodes an homolog of a nucleoprotein Contig 12656 encodes an homolog of a Glycoprotein Contig 5374 (287-1540) encodes an homolog of a nucleoprotein Contig 5374 (1884-2501) encodes an “hypothetical protein”
No trace of RdRp… However almost Contig 5374 and 11296 almost 100% identical at the nucleotidic level with
https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=2792591
see Kafer et al.2019
Define the corresponding contigs :
contig_set=paste0("contig_", c(6541, 10992, 10108, 10707, 9260, 17877))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Phasmaviridae_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Phasmaviridae_Tricho")
## orf_name seqid source type start end score strand
## 3 contig_10108_65_1651_+ contig_10108 getorf_JV gene 65 1651 . TRUE
## 23 contig_10707_197_1585_+ contig_10707 getorf_JV gene 197 1585 . TRUE
## 29 contig_10992_1_1515_- contig_10992 getorf_JV gene 1 1515 . FALSE
## 213 contig_17877_24_971_+ contig_17877 getorf_JV gene 24 971 . TRUE
## 368 contig_6541_128_289_- contig_6541 getorf_JV gene 128 289 . FALSE
## 369 contig_6541_1813_1992_- contig_6541 getorf_JV gene 1813 1992 . FALSE
## 370 contig_6541_2008_2172_+ contig_6541 getorf_JV gene 2008 2172 . TRUE
## 371 contig_6541_340_1782_+ contig_6541 getorf_JV gene 340 1782 . TRUE
## 444 contig_9260_3_1454_- contig_9260 getorf_JV gene 3 1454 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 3 1 1587 1651 YP_009666981.1 0.584 529
## 23 1 1389 1587 YP_009666981.1 0.411 462
## 29 1 1515 1559 YP_009666981.1 0.363 446
## 213 1 948 1143 YP_009666981.1 0.324 293
## 368 1 162 2229 <NA> NA NA
## 369 1 180 2229 <NA> NA NA
## 370 1 165 2229 <NA> NA NA
## 371 1 1443 2229 YP_009666983.1 0.392 283
## 444 1 1452 1755 YP_009666982.1 0.321 474
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 3 219 0 1 529 541 1068 3.500e-199 629
## 23 269 0 1 462 1179 1636 6.546e-113 376
## 29 282 0 57 499 57 502 5.136e-79 278
## 213 193 0 1 293 1651 1937 1.024e-38 153
## 368 NA NA NA NA NA NA NA NA
## 369 NA NA NA NA NA NA NA NA
## 370 NA NA NA NA NA NA NA NA
## 371 167 0 12 294 11 285 1.841e-56 211
## 444 318 0 9 482 132 601 4.895e-91 313
## annotation
## 3 RNA-dependent RNA polymerase [Ganda bee virus]
## 23 RNA-dependent RNA polymerase [Ganda bee virus]
## 29 RNA-dependent RNA polymerase [Ganda bee virus]
## 213 RNA-dependent RNA polymerase [Ganda bee virus]
## 368 <NA>
## 369 <NA>
## 370 <NA>
## 371 nucleoprotein [Ganda bee virus]
## 444 glycoprotein precursor [Ganda bee virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 3 contig_10108_65_1651_+ contig_10108 getorf_JV gene 65 1651 . TRUE
## 23 contig_10707_197_1585_+ contig_10707 getorf_JV gene 197 1585 . TRUE
## 29 contig_10992_1_1515_- contig_10992 getorf_JV gene 1 1515 . FALSE
## 213 contig_17877_24_971_+ contig_17877 getorf_JV gene 24 971 . TRUE
## 368 contig_6541_128_289_- contig_6541 getorf_JV gene 128 289 . FALSE
## 369 contig_6541_1813_1992_- contig_6541 getorf_JV gene 1813 1992 . FALSE
## 370 contig_6541_2008_2172_+ contig_6541 getorf_JV gene 2008 2172 . TRUE
## 371 contig_6541_340_1782_+ contig_6541 getorf_JV gene 340 1782 . TRUE
## 444 contig_9260_3_1454_- contig_9260 getorf_JV gene 3 1454 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 3 1 1587 1651 YP_009666981.1 0.584 529
## 23 1 1389 1587 YP_009666981.1 0.411 462
## 29 1 1515 1559 YP_009666981.1 0.363 446
## 213 1 948 1143 YP_009666981.1 0.324 293
## 368 1 162 2229 <NA> NA NA
## 369 1 180 2229 <NA> NA NA
## 370 1 165 2229 <NA> NA NA
## 371 1 1443 2229 YP_009666983.1 0.392 283
## 444 1 1452 1755 YP_009666982.1 0.321 474
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 3 219 0 1 529 541 1068 3.500e-199 629
## 23 269 0 1 462 1179 1636 6.546e-113 376
## 29 282 0 57 499 57 502 5.136e-79 278
## 213 193 0 1 293 1651 1937 1.024e-38 153
## 368 NA NA NA NA NA NA NA NA
## 369 NA NA NA NA NA NA NA NA
## 370 NA NA NA NA NA NA NA NA
## 371 167 0 12 294 11 285 1.841e-56 211
## 444 318 0 9 482 132 601 4.895e-91 313
## annotation
## 3 RNA-dependent RNA polymerase [Ganda bee virus]
## 23 RNA-dependent RNA polymerase [Ganda bee virus]
## 29 RNA-dependent RNA polymerase [Ganda bee virus]
## 213 RNA-dependent RNA polymerase [Ganda bee virus]
## 368 <NA>
## 369 <NA>
## 370 <NA>
## 371 nucleoprotein [Ganda bee virus]
## 444 glycoprotein precursor [Ganda bee virus]
Surprisingly, 4 of the contigs encode a RdRp suggesting either that several viruses are present or that the assembly is incomplete. After looking at the blast results, it is clear that the assembly is incomplete leading to fragmented RdRp. We artificially fused the 4 parts (order is : 10992, 10108, 10707 and 17877) which covered the majority of the related protein.
p = plot_phylogeny("../phylogenies/contig_10992_1_1515_-_FUSED+with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,3)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_10992_1_1515_-_FUSED.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Ganda bee virus genome is composed of three segments: 6453bp for the RdRp coding segment, 2101bp for the glycoprotein precursor (GnGc) gene and 1906bp for the nucleoprotein (N) gene. See fig 3 of Schoonvaere et al. Plos one 2016.
https://www.genome.jp/dbget-bin/www_bget?refseq:NC_043642 https://www.genome.jp/dbget-bin/www_bget?refseq:NC_043643 https://www.genome.jp/dbget-bin/www_bget?refseq:NC_043644
It seems the genome is almost complete, appart the fact that the RdRp is scattered among 4 contigs.
Contig 9260 encodes the Glycoprotein (M segment), and Contig 6541 encodes the nucleoprotein and other unannotated ORFs (S segment).
Define the corresponding contigs :
contig_set=c("contig_17370", "contig_17755", "contig_6072", "contig_6134", "contig_6311", "contig_17655", "contig_11550")
# store for later fusion of corresponding lines
virus_list$"Reoviridae4_A.sp"=list(contig_set=contig_set, contig_set_unassigned=NA)
writeXStringSet(contigs_wta[contig_set], "../sequences/RNA_virus_genomes/Reoviridae4_A.sp.fa")
#writeXStringSet(contigs_wta[contig_set_unassigned], "../sequences/RNA_virus_genomes/Viruses_Asobara_unassigned.fa")
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reovirus Asobara")
## orf_name seqid source type start end score strand
## 56 contig_11550_2_160_+ contig_11550 getorf_JV gene 2 160 . TRUE
## 57 contig_11550_53_532_- contig_11550 getorf_JV gene 53 532 . FALSE
## 58 contig_11550_538_1464_- contig_11550 getorf_JV gene 538 1464 . FALSE
## 59 contig_11550_538_1509_- contig_11550 getorf_JV gene 538 1509 . FALSE
## 192 contig_17370_1_1053_- contig_17370 getorf_JV gene 1 1053 . FALSE
## 193 contig_17370_1_1161_- contig_17370 getorf_JV gene 1 1161 . FALSE
## 202 contig_17655_43_1140_- contig_17655 getorf_JV gene 43 1140 . FALSE
## 203 contig_17655_43_1149_- contig_17655 getorf_JV gene 43 1149 . FALSE
## 206 contig_17755_3_1031_+ contig_17755 getorf_JV gene 3 1031 . TRUE
## 207 contig_17755_3_1031_+ contig_17755 getorf_JV gene 3 1031 . TRUE
## 362 contig_6072_2_2275_- contig_6072 getorf_JV gene 2 2275 . FALSE
## 363 contig_6072_2_2359_- contig_6072 getorf_JV gene 2 2359 . FALSE
## 364 contig_6134_46_2247_- contig_6134 getorf_JV gene 46 2247 . FALSE
## 365 contig_6311_2_2083_- contig_6311 getorf_JV gene 2 2083 . FALSE
## 366 contig_6311_2_2287_- contig_6311 getorf_JV gene 2 2287 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 56 2 159 1511 <NA> NA NA
## 57 1 480 1511 <NA> NA NA
## 58 1 927 1511 <NA> NA NA
## 59 2 972 1511 <NA> NA NA
## 192 1 1053 1162 <NA> NA NA
## 193 2 1161 1162 <NA> NA NA
## 202 1 1098 1151 <NA> NA NA
## 203 2 1107 1151 <NA> NA NA
## 206 2 1029 1148 <NA> NA NA
## 207 1 1029 1148 <NA> NA NA
## 362 1 2274 2359 <NA> NA NA
## 363 2 2358 2359 <NA> NA NA
## 364 1 2202 2341 YP_009158901.1 0.233 655
## 365 1 2082 2289 <NA> NA NA
## 366 2 2286 2289 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 56 NA NA NA NA NA NA NA NA
## 57 NA NA NA NA NA NA NA NA
## 58 NA NA NA NA NA NA NA NA
## 59 NA NA NA NA NA NA NA NA
## 192 NA NA NA NA NA NA NA NA
## 193 NA NA NA NA NA NA NA NA
## 202 NA NA NA NA NA NA NA NA
## 203 NA NA NA NA NA NA NA NA
## 206 NA NA NA NA NA NA NA NA
## 207 NA NA NA NA NA NA NA NA
## 362 NA NA NA NA NA NA NA NA
## 363 NA NA NA NA NA NA NA NA
## 364 480 0 17 671 613 1239 5.188e-32 140
## 365 NA NA NA NA NA NA NA NA
## 366 NA NA NA NA NA NA NA NA
## annotation
## 56 <NA>
## 57 <NA>
## 58 <NA>
## 59 <NA>
## 192 <NA>
## 193 <NA>
## 202 <NA>
## 203 <NA>
## 206 <NA>
## 207 <NA>
## 362 <NA>
## 363 <NA>
## 364 RNA-dependent RNA polymerase [Chobar Gorge virus]
## 365 <NA>
## 366 <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 56 contig_11550_2_160_+ contig_11550 getorf_JV gene 2 160 . TRUE
## 57 contig_11550_53_532_- contig_11550 getorf_JV gene 53 532 . FALSE
## 58 contig_11550_538_1464_- contig_11550 getorf_JV gene 538 1464 . FALSE
## 59 contig_11550_538_1509_- contig_11550 getorf_JV gene 538 1509 . FALSE
## 192 contig_17370_1_1053_- contig_17370 getorf_JV gene 1 1053 . FALSE
## 193 contig_17370_1_1161_- contig_17370 getorf_JV gene 1 1161 . FALSE
## 202 contig_17655_43_1140_- contig_17655 getorf_JV gene 43 1140 . FALSE
## 203 contig_17655_43_1149_- contig_17655 getorf_JV gene 43 1149 . FALSE
## 206 contig_17755_3_1031_+ contig_17755 getorf_JV gene 3 1031 . TRUE
## 207 contig_17755_3_1031_+ contig_17755 getorf_JV gene 3 1031 . TRUE
## 362 contig_6072_2_2275_- contig_6072 getorf_JV gene 2 2275 . FALSE
## 363 contig_6072_2_2359_- contig_6072 getorf_JV gene 2 2359 . FALSE
## 364 contig_6134_46_2247_- contig_6134 getorf_JV gene 46 2247 . FALSE
## 365 contig_6311_2_2083_- contig_6311 getorf_JV gene 2 2083 . FALSE
## 366 contig_6311_2_2287_- contig_6311 getorf_JV gene 2 2287 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 56 2 159 1511 <NA> NA NA
## 57 1 480 1511 <NA> NA NA
## 58 1 927 1511 <NA> NA NA
## 59 2 972 1511 <NA> NA NA
## 192 1 1053 1162 <NA> NA NA
## 193 2 1161 1162 <NA> NA NA
## 202 1 1098 1151 <NA> NA NA
## 203 2 1107 1151 <NA> NA NA
## 206 2 1029 1148 <NA> NA NA
## 207 1 1029 1148 <NA> NA NA
## 362 1 2274 2359 <NA> NA NA
## 363 2 2358 2359 <NA> NA NA
## 364 1 2202 2341 YP_009158901.1 0.233 655
## 365 1 2082 2289 <NA> NA NA
## 366 2 2286 2289 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 56 NA NA NA NA NA NA NA NA
## 57 NA NA NA NA NA NA NA NA
## 58 NA NA NA NA NA NA NA NA
## 59 NA NA NA NA NA NA NA NA
## 192 NA NA NA NA NA NA NA NA
## 193 NA NA NA NA NA NA NA NA
## 202 NA NA NA NA NA NA NA NA
## 203 NA NA NA NA NA NA NA NA
## 206 NA NA NA NA NA NA NA NA
## 207 NA NA NA NA NA NA NA NA
## 362 NA NA NA NA NA NA NA NA
## 363 NA NA NA NA NA NA NA NA
## 364 480 0 17 671 613 1239 5.188e-32 140
## 365 NA NA NA NA NA NA NA NA
## 366 NA NA NA NA NA NA NA NA
## annotation
## 56 <NA>
## 57 <NA>
## 58 <NA>
## 59 <NA>
## 192 <NA>
## 193 <NA>
## 202 <NA>
## 203 <NA>
## 206 <NA>
## 207 <NA>
## 362 <NA>
## 363 <NA>
## 364 RNA-dependent RNA polymerase [Chobar Gorge virus]
## 365 <NA>
## 366 <NA>
These sequences were grouped together after a later blastx analysis (27/07/2022). The 7 sequences are related to orbi-like viruses (Reoviridae).
contig 6134 is incomplete with 734 aa where the closest relative has 1284 aa. Apparently a reoviridae.
p = plot_phylogeny("../phylogenies/contig_6134_46_2247_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p=p+xlim(0,10)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_6134_46_2247_-.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
I built a phylogeny for contig17370 [Serbia reo-like virus 1] using the
sequences from ncbi. Interestingly it is also related to [Hubei odonate
virus 15]:
p = plot_phylogeny("../phylogenies/contig_17370-PhyML_tree", taxo_info = wta_taxo_info)
p=p+xlim(0,5)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_17370.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
see ../TABLES/Reoviridae4_A.sp_blastx_2022_07_27.txt
for details
p = plot_phylogeny("../phylogenies/Reoviridae_all_nr2-PhyML_tree", taxo_info = wta_taxo_info)
p=p+xlim(0,24)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_All_reoviruses_non_redundant.pdf", plot = p, width=8, height=10)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=c("contig_13728")
# store for later fusion of corresponding lines
virus_list$"Chuviridae2_A.sp"=list(contig_set=contig_set[1], contig_set_unassigned=NA)
writeXStringSet(contigs_wta[contig_set], "../sequences/RNA_virus_genomes/Chuviridae2_A.sp.fa")
#writeXStringSet(contigs_wta[contig_set_unassigned], "../sequences/RNA_virus_genomes/Viruses_Asobara_unassigned.fa")
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = NA, gff = gff_wta2 , name = "Chuvirus Asobara")
## orf_name seqid source type start end score strand
## 118 contig_13728_25_1296_+ contig_13728 getorf_JV gene 25 1296 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 118 1 1272 1346 YP_009337904.1 0.232 408
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 118 313 0 2 409 1757 2164 5.413e-21 102
## annotation
## 118 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 1]
## Saving 7 x 5 in image
res[1]
## [[1]]
The RdRp is incomplete for sure (13728): only 424 aa where the closest relative has 2172 aa. Apparently a Chuviridae.
p = plot_phylogeny("../phylogenies/contig_13728_25_1296_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
ggsave(filename = "../phylogenies/contig_13728_25_1296__+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=c("contig_14948", "contig_15126")
# store for later fusion of corresponding lines
virus_list$"Partiti-like2_A.sp"=list(contig_set=contig_set, contig_set_unassigned=NA)
writeXStringSet(contigs_wta[contig_set], "../sequences/RNA_virus_genomes/Partiti-like2_A.sp.fa")
#writeXStringSet(contigs_wta[contig_set_unassigned], "../sequences/RNA_virus_genomes/Viruses_Asobara_unassigned.fa")
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = NA, gff = gff_wta2 , name = "Partiti Asobara")
## orf_name seqid source type start end score strand
## 138 contig_14948_2_1144_- contig_14948 getorf_JV gene 2 1144 . FALSE
## 141 contig_15126_55_1263_- contig_15126 getorf_JV gene 55 1263 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 138 1 1143 1276 YP_052856.2 0.285 308
## 141 1 1209 1267 YP_009329869.1 0.467 371
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 138 214 0 73 372 143 450 7.122e-22 104
## 141 197 0 14 383 81 451 3.725e-115 379
## annotation
## 138 RNA-dependent RNA polymerase [Penicillium stoloniferum virus S]
## 141 RdRp [Beihai barnacle virus 13]
## Saving 7 x 5 in image
res[1]
## [[1]]
Contig 14948 has 381 aa and the closest relative has 539 aa. Apparently a Partitiviridae.
p = plot_phylogeny("../phylogenies/contig_14948_2_1144_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
ggsave(filename = "../phylogenies/contig_14948_2_1144_-.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=c("contig_14619")
# store for later fusion of corresponding lines
virus_list$"Nidoviridae_A.sp"=list(contig_set=contig_set[1], contig_set_unassigned=NA)
writeXStringSet(contigs_wta[contig_set], "../sequences/RNA_virus_genomes/Nidoviridae_A.sp.fa")
#writeXStringSet(contigs_wta[contig_set_unassigned], "../sequences/RNA_virus_genomes/Viruses_Asobara_unassigned.fa")
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = NA, gff = gff_wta2 , name = "Nido Asobara")
## orf_name seqid source type start end score strand
## 133 contig_14619_145_1134_- contig_14619 getorf_JV gene 145 1134 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 133 1 990 1293 AWA82244.1 0.676 34 11
## gap_opens qstart qend sstart send evalue bitscore
## 133 0 269 302 41 74 5.124e-05 50
## annotation
## 133 hypothetical protein [Fuefuki virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
*Fuefuki-like virus : described in Medd et al. in D.suz. Nidoviridae non segmented ~16kb +ssRNA
contig_set=NA
contig_set_unassigned=c("contig_10171", "contig_16255")
# store for later fusion of corresponding lines
virus_list$"Dark2_A.sp"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Dark2_A.sp_Asobara")
## orf_name seqid source type start end score strand
## 4 contig_10171_3_1598_+ contig_10171 getorf_JV gene 3 1598 . TRUE
## 5 contig_10171_57_1598_+ contig_10171 getorf_JV gene 57 1598 . TRUE
## 161 contig_16255_2_832_+ contig_16255 getorf_JV gene 2 832 . TRUE
## 162 contig_16255_218_832_+ contig_16255 getorf_JV gene 218 832 . TRUE
## 163 contig_16255_999_1211_- contig_16255 getorf_JV gene 999 1211 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 4 2 1596 1644 <NA> NA NA NA
## 5 1 1542 1644 <NA> NA NA NA
## 161 2 831 1212 <NA> NA NA NA
## 162 1 615 1212 <NA> NA NA NA
## 163 2 213 1212 <NA> NA NA NA
## gap_opens qstart qend sstart send evalue bitscore annotation
## 4 NA NA NA NA NA NA NA <NA>
## 5 NA NA NA NA NA NA NA <NA>
## 161 NA NA NA NA NA NA NA <NA>
## 162 NA NA NA NA NA NA NA <NA>
## 163 NA NA NA NA NA NA NA <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
Define the corresponding contigs :
contig_set=c("contig_22592", "contig_10041", "contig_21669", "contig_17996")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Powburn_Dsub|obs"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Powburn_Dsub|obs")
## orf_name seqid source type start end score strand
## 2 contig_10041_2_1402_- contig_10041 getorf_JV gene 2 1402 . FALSE
## 216 contig_17996_142_1068_- contig_17996 getorf_JV gene 142 1068 . FALSE
## 280 contig_21669_3_524_- contig_21669 getorf_JV gene 3 524 . FALSE
## 281 contig_21669_768_992_+ contig_21669 getorf_JV gene 768 992 . TRUE
## 292 contig_22592_353_7750_- contig_22592 getorf_JV gene 353 7750 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 2 1 1401 1659 AMO03227.1 0.948 467 24
## 216 1 927 1139 AKH40285.1 1.000 196 0
## 280 1 522 1021 AMO03227.1 0.959 174 7
## 281 1 225 1021 <NA> NA NA NA
## 292 1 7398 7911 AMO03227.1 0.951 2466 121
## gap_opens qstart qend sstart send evalue bitscore
## 2 0 1 467 1 467 4.418e-295 902
## 216 0 1 196 2653 2848 3.828e-120 389
## 280 0 1 174 137 310 2.524e-111 355
## 281 NA NA NA NA NA NA NA
## 292 0 1 2466 399 2860 0.000e+00 4779
## annotation
## 2 putative polyprotein [Pow Burn virus]
## 216 putative polyprotein [Thika virus]
## 280 putative polyprotein [Pow Burn virus]
## 281 <NA>
## 292 putative polyprotein [Pow Burn virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
Contig 21669 is 901% identical with 10041 at the nucleotidic level.
Maybe a subgenomic fragment?
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 2 contig_10041_2_1402_- contig_10041 getorf_JV gene 2 1402 . FALSE
## 216 contig_17996_142_1068_- contig_17996 getorf_JV gene 142 1068 . FALSE
## 280 contig_21669_3_524_- contig_21669 getorf_JV gene 3 524 . FALSE
## 281 contig_21669_768_992_+ contig_21669 getorf_JV gene 768 992 . TRUE
## 292 contig_22592_353_7750_- contig_22592 getorf_JV gene 353 7750 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 2 1 1401 1659 AMO03227.1 0.948 467 24
## 216 1 927 1139 AKH40285.1 1.000 196 0
## 280 1 522 1021 AMO03227.1 0.959 174 7
## 281 1 225 1021 <NA> NA NA NA
## 292 1 7398 7911 AMO03227.1 0.951 2466 121
## gap_opens qstart qend sstart send evalue bitscore
## 2 0 1 467 1 467 4.418e-295 902
## 216 0 1 196 2653 2848 3.828e-120 389
## 280 0 1 174 137 310 2.524e-111 355
## 281 NA NA NA NA NA NA NA
## 292 0 1 2466 399 2860 0.000e+00 4779
## annotation
## 2 putative polyprotein [Pow Burn virus]
## 216 putative polyprotein [Thika virus]
## 280 putative polyprotein [Pow Burn virus]
## 281 <NA>
## 292 putative polyprotein [Pow Burn virus]
ggplot(tab, aes(xmin = start, xmax = end, y = seqid, forward = strand, label= annotation)) +
geom_gene_arrow(aes(lty=phase)) + facet_wrap(~ seqid, scales = "free_y", ncol = 1) + geom_gene_label(align = "centre") +theme_genes() + geom_segment(aes(y = seqid, yend = seqid, x=seq_length), xend = 100000, colour = "white", size = 2)
## Warning: Removed 29 rows containing missing values (`geom_gene_label()`).
Pow burn virus has a 9,268 bp genomic sequence and has been detected in
Dsub, Dobs, Dsus, Sdef according to Obbard table. Our assembly appear to
be fragmented.
Thika has a 9kb genome also,
https://www.genome.jp/dbget-bin/www_bget?refseq:NC_027127
The polyprotein encoded by contig_22592 contains the following domains :
Domains for contig_22592
Based on RdRp domain, we built the following phylogeny:
p = plot_phylogeny("../phylogenies/contig_22592_353_7750_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,15)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_22592_353_7750.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=c("contig_17880")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Phenuiviridae_Pachy"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Phenuiviridae_Pachy")
## orf_name seqid source type start end score strand
## 214 contig_17880_3_983_- contig_17880 getorf_JV gene 3 983 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 214 1 981 1143 YP_009664616.1 0.329 333
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 214 219 0 1 327 166 498 1.434e-52 194
## annotation
## 214 glycoprotein G [Cumuto virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 214 contig_17880_3_983_- contig_17880 getorf_JV gene 3 983 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 214 1 981 1143 YP_009664616.1 0.329 333
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 214 219 0 1 327 166 498 1.434e-52 194
## annotation
## 214 glycoprotein G [Cumuto virus]
Goukoviruses are expected to have 3 segments (1.1kb, 6.4kb and 3.2kb). They infect insects. https://viralzone.expasy.org/7102
The protein is most likely incomplete (327 aa versus ~1000 aa for related sequences). Nevertheless, we built the following phylogeny:
p = plot_phylogeny("../phylogenies/contig_17880_3_983_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,10)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_17880_3_983.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=paste0("contig_", c(21672, 14400, 6047, 22214, 5748, 22929, 3046, 3854, 8099, 13150))
contig_set_unassigned=paste0("contig_", c(17745, 19572))
# store for later fusion of corresponding lines
virus_list$"Reoviridae3_L.sp"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae3_L.sp")
## orf_name seqid source type start end score strand
## 96 contig_13150_22_1296_- contig_13150 getorf_JV gene 22 1296 . FALSE
## 130 contig_14400_288_1076_+ contig_14400 getorf_JV gene 288 1076 . TRUE
## 204 contig_17745_53_1114_+ contig_17745 getorf_JV gene 53 1114 . TRUE
## 205 contig_17745_74_1114_+ contig_17745 getorf_JV gene 74 1114 . TRUE
## 233 contig_19572_109_1065_+ contig_19572 getorf_JV gene 109 1065 . TRUE
## 234 contig_19572_157_1065_+ contig_19572 getorf_JV gene 157 1065 . TRUE
## 282 contig_21672_1_723_- contig_21672 getorf_JV gene 1 723 . FALSE
## 289 contig_22214_228_890_+ contig_22214 getorf_JV gene 228 890 . TRUE
## 305 contig_22929_193_351_+ contig_22929 getorf_JV gene 193 351 . TRUE
## 306 contig_22929_630_3638_+ contig_22929 getorf_JV gene 630 3638 . TRUE
## 326 contig_3046_17_3883_- contig_3046 getorf_JV gene 17 3883 . FALSE
## 333 contig_3854_122_3259_+ contig_3854 getorf_JV gene 122 3259 . TRUE
## 355 contig_5748_2246_2413_+ contig_5748 getorf_JV gene 2246 2413 . TRUE
## 356 contig_5748_44_2227_+ contig_5748 getorf_JV gene 44 2227 . TRUE
## 359 contig_6047_255_2162_+ contig_6047 getorf_JV gene 255 2162 . TRUE
## 405 contig_8099_1260_1814_- contig_8099 getorf_JV gene 1260 1814 . FALSE
## 406 contig_8099_78_1205_- contig_8099 getorf_JV gene 78 1205 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 96 1 1275 1387 YP_009059068.1 0.250 415
## 130 1 789 1307 YP_009059071.1 0.340 233
## 204 2 1062 1148 <NA> NA NA
## 205 1 1041 1148 <NA> NA NA
## 233 2 957 1083 <NA> NA NA
## 234 1 909 1083 <NA> NA NA
## 282 1 723 1021 YP_009059071.1 0.329 229
## 289 1 663 1005 YP_009059077.1 0.408 224
## 305 1 159 3742 <NA> NA NA
## 306 1 3009 3742 YP_009059073.1 0.403 1003
## 326 1 3867 4024 YP_009072449.1 0.470 1269
## 333 1 3138 3344 YP_009059074.1 0.322 1048
## 355 1 168 2466 <NA> NA NA
## 356 1 2184 2466 YP_009059076.1 0.264 673
## 359 1 1908 2178 YP_009059075.1 0.501 631
## 405 1 555 1917 <NA> NA NA
## 406 1 1128 1917 YP_009059067.1 0.253 304
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 96 295 0 18 411 79 493 3.846e-32 136
## 130 149 0 20 245 5 237 1.592e-26 115
## 204 NA NA NA NA NA NA NA NA
## 205 NA NA NA NA NA NA NA NA
## 233 NA NA NA NA NA NA NA NA
## 234 NA NA NA NA NA NA NA NA
## 282 148 0 20 241 5 233 2.174e-24 108
## 289 128 0 1 218 350 573 1.191e-43 163
## 305 NA NA NA NA NA NA NA NA
## 306 596 0 2 1001 196 1198 6.704e-261 832
## 326 671 0 17 1285 66 1332 0.000e+00 1154
## 333 702 0 4 1040 4 1051 8.497e-170 567
## 355 NA NA NA NA NA NA NA NA
## 356 472 0 87 728 39 711 6.071e-53 207
## 359 314 0 1 631 99 728 9.422e-182 584
## 405 NA NA NA NA NA NA NA NA
## 406 225 0 73 376 279 580 5.766e-19 95
## annotation
## 96 hypothetical protein [Cimodo virus]
## 130 hypothetical protein [Cimodo virus]
## 204 <NA>
## 205 <NA>
## 233 <NA>
## 234 <NA>
## 282 hypothetical protein [Cimodo virus]
## 289 NTP-binding domain protein [Cimodo virus]
## 305 <NA>
## 306 hypothetical protein [Cimodo virus]
## 326 RNA-dependent RNA polymerase [Cimodo virus]
## 333 hypothetical protein [Cimodo virus]
## 355 <NA>
## 356 hypothetical protein [Cimodo virus]
## 359 hypothetical protein [Cimodo virus]
## 405 <NA>
## 406 hypothetical protein [Cimodo virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 96 contig_13150_22_1296_- contig_13150 getorf_JV gene 22 1296 . FALSE
## 130 contig_14400_288_1076_+ contig_14400 getorf_JV gene 288 1076 . TRUE
## 204 contig_17745_53_1114_+ contig_17745 getorf_JV gene 53 1114 . TRUE
## 205 contig_17745_74_1114_+ contig_17745 getorf_JV gene 74 1114 . TRUE
## 233 contig_19572_109_1065_+ contig_19572 getorf_JV gene 109 1065 . TRUE
## 234 contig_19572_157_1065_+ contig_19572 getorf_JV gene 157 1065 . TRUE
## 282 contig_21672_1_723_- contig_21672 getorf_JV gene 1 723 . FALSE
## 289 contig_22214_228_890_+ contig_22214 getorf_JV gene 228 890 . TRUE
## 305 contig_22929_193_351_+ contig_22929 getorf_JV gene 193 351 . TRUE
## 306 contig_22929_630_3638_+ contig_22929 getorf_JV gene 630 3638 . TRUE
## 326 contig_3046_17_3883_- contig_3046 getorf_JV gene 17 3883 . FALSE
## 333 contig_3854_122_3259_+ contig_3854 getorf_JV gene 122 3259 . TRUE
## 355 contig_5748_2246_2413_+ contig_5748 getorf_JV gene 2246 2413 . TRUE
## 356 contig_5748_44_2227_+ contig_5748 getorf_JV gene 44 2227 . TRUE
## 359 contig_6047_255_2162_+ contig_6047 getorf_JV gene 255 2162 . TRUE
## 405 contig_8099_1260_1814_- contig_8099 getorf_JV gene 1260 1814 . FALSE
## 406 contig_8099_78_1205_- contig_8099 getorf_JV gene 78 1205 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 96 1 1275 1387 YP_009059068.1 0.250 415
## 130 1 789 1307 YP_009059071.1 0.340 233
## 204 2 1062 1148 <NA> NA NA
## 205 1 1041 1148 <NA> NA NA
## 233 2 957 1083 <NA> NA NA
## 234 1 909 1083 <NA> NA NA
## 282 1 723 1021 YP_009059071.1 0.329 229
## 289 1 663 1005 YP_009059077.1 0.408 224
## 305 1 159 3742 <NA> NA NA
## 306 1 3009 3742 YP_009059073.1 0.403 1003
## 326 1 3867 4024 YP_009072449.1 0.470 1269
## 333 1 3138 3344 YP_009059074.1 0.322 1048
## 355 1 168 2466 <NA> NA NA
## 356 1 2184 2466 YP_009059076.1 0.264 673
## 359 1 1908 2178 YP_009059075.1 0.501 631
## 405 1 555 1917 <NA> NA NA
## 406 1 1128 1917 YP_009059067.1 0.253 304
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 96 295 0 18 411 79 493 3.846e-32 136
## 130 149 0 20 245 5 237 1.592e-26 115
## 204 NA NA NA NA NA NA NA NA
## 205 NA NA NA NA NA NA NA NA
## 233 NA NA NA NA NA NA NA NA
## 234 NA NA NA NA NA NA NA NA
## 282 148 0 20 241 5 233 2.174e-24 108
## 289 128 0 1 218 350 573 1.191e-43 163
## 305 NA NA NA NA NA NA NA NA
## 306 596 0 2 1001 196 1198 6.704e-261 832
## 326 671 0 17 1285 66 1332 0.000e+00 1154
## 333 702 0 4 1040 4 1051 8.497e-170 567
## 355 NA NA NA NA NA NA NA NA
## 356 472 0 87 728 39 711 6.071e-53 207
## 359 314 0 1 631 99 728 9.422e-182 584
## 405 NA NA NA NA NA NA NA NA
## 406 225 0 73 376 279 580 5.766e-19 95
## annotation
## 96 hypothetical protein [Cimodo virus]
## 130 hypothetical protein [Cimodo virus]
## 204 <NA>
## 205 <NA>
## 233 <NA>
## 234 <NA>
## 282 hypothetical protein [Cimodo virus]
## 289 NTP-binding domain protein [Cimodo virus]
## 305 <NA>
## 306 hypothetical protein [Cimodo virus]
## 326 RNA-dependent RNA polymerase [Cimodo virus]
## 333 hypothetical protein [Cimodo virus]
## 355 <NA>
## 356 hypothetical protein [Cimodo virus]
## 359 hypothetical protein [Cimodo virus]
## 405 <NA>
## 406 hypothetical protein [Cimodo virus]
Cimodo virus has a 12-segmented genome https://www.genome.jp/virushostdb/1427476
Two contigs without homology are included as they co-occur with the other 10 segments (17745 18322 : note that they are 93% identicial at the nucleotide level). They do contain a single ORF each.
We built a phylogeny based on putative RdRp:
p = plot_phylogeny("../phylogenies/contig_3046_17_3883_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,8)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_3046.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
impossible to ascribe the unassigned contigs to either Bloomfield or Cimodo virus..
Define the corresponding contigs :
contig_set=paste0("contig_", c(8808,4942, 8318, 8787, 4957))
contig_set_unassigned=NA
#contig_set_unassigned=paste0("contig_", c( 20236, 7428,3260, 18191, 10916, 7503, 16163))
# store for later fusion of corresponding lines
virus_list$"Reoviridae7"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae7")
## orf_name seqid source type start end score strand
## 340 contig_4942_3_2741_- contig_4942 getorf_JV gene 3 2741 . FALSE
## 341 contig_4957_2_2599_- contig_4957 getorf_JV gene 2 2599 . FALSE
## 342 contig_4957_2_2626_- contig_4957 getorf_JV gene 2 2626 . FALSE
## 420 contig_8318_119_1861_- contig_8318 getorf_JV gene 119 1861 . FALSE
## 423 contig_8787_83_1774_- contig_8787 getorf_JV gene 83 1774 . FALSE
## 425 contig_8808_1_1716_- contig_8808 getorf_JV gene 1 1716 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 340 1 2739 2779 AKH40310.1 0.332 904 597
## 341 1 2598 2772 <NA> NA NA NA
## 342 2 2625 2772 <NA> NA NA NA
## 420 1 1743 1883 AKH40314.1 0.285 239 167
## 423 1 1692 1813 AKH40315.1 0.290 508 335
## 425 1 1716 1812 AKH40312.1 0.267 215 152
## gap_opens qstart qend sstart send evalue bitscore
## 340 0 18 912 98 1001 1.698e-140 475
## 341 NA NA NA NA NA NA NA
## 342 NA NA NA NA NA NA NA
## 420 0 337 570 432 670 1.828e-16 89
## 423 0 12 519 29 501 9.100e-48 187
## 425 0 65 273 63 277 4.012e-08 62
## annotation
## 340 putative polymerase [Bloomfield virus]
## 341 <NA>
## 342 <NA>
## 420 ORF1 [Bloomfield virus]
## 423 ORF1 [Bloomfield virus]
## 425 ORF1 [Bloomfield virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
contig 4957 => putative major core protein [Bloomfield virus] Sequence ID: AKH40311.1Length: 1228 Full length contig 20236,7428,3260, 18191 => no hit
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 340 contig_4942_3_2741_- contig_4942 getorf_JV gene 3 2741 . FALSE
## 341 contig_4957_2_2599_- contig_4957 getorf_JV gene 2 2599 . FALSE
## 342 contig_4957_2_2626_- contig_4957 getorf_JV gene 2 2626 . FALSE
## 420 contig_8318_119_1861_- contig_8318 getorf_JV gene 119 1861 . FALSE
## 423 contig_8787_83_1774_- contig_8787 getorf_JV gene 83 1774 . FALSE
## 425 contig_8808_1_1716_- contig_8808 getorf_JV gene 1 1716 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 340 1 2739 2779 AKH40310.1 0.332 904 597
## 341 1 2598 2772 <NA> NA NA NA
## 342 2 2625 2772 <NA> NA NA NA
## 420 1 1743 1883 AKH40314.1 0.285 239 167
## 423 1 1692 1813 AKH40315.1 0.290 508 335
## 425 1 1716 1812 AKH40312.1 0.267 215 152
## gap_opens qstart qend sstart send evalue bitscore
## 340 0 18 912 98 1001 1.698e-140 475
## 341 NA NA NA NA NA NA NA
## 342 NA NA NA NA NA NA NA
## 420 0 337 570 432 670 1.828e-16 89
## 423 0 12 519 29 501 9.100e-48 187
## 425 0 65 273 63 277 4.012e-08 62
## annotation
## 340 putative polymerase [Bloomfield virus]
## 341 <NA>
## 342 <NA>
## 420 ORF1 [Bloomfield virus]
## 423 ORF1 [Bloomfield virus]
## 425 ORF1 [Bloomfield virus]
# # some of the contigs do have ORFs but incomplete (lacking either start, stop or both). To get them, run the script ../../scripts_annotation/orf_prediction.R with option 0 for getorf to predict them.
# gff_unassigned_option0=read.table("../sequences/RNA_virus_genomes/Bloomfield-like_Lepto_Dkun_Pachy_unassigned.gff", header=FALSE)
# names(gff_unassigned_option0)=c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "seq_length")
# # use phase column to indicate that the orf has been predicted with getorf option 0 (instead of 1)
# gff_unassigned_option0$phase="*"
# orf_names=paste(gff_unassigned_option0$seqid, gff_unassigned_option0$start, gff_unassigned_option0$end, gff_unassigned_option0$strand, sep="_")
# orf_names=sub(pattern = "TRUE", "+", x = orf_names)
# orf_names=sub(pattern = "FALSE", "-", x = orf_names)
# gff_unassigned_option0$orf_name=orf_names
#
# # join the tables
# tab=full_join(tab,gff_unassigned_option0)
# tab
#tab$phase=as.factor(x = tab$phase)
#levels(tab$phase)=c(2,1)
# add annotation for
#tab[tab$orf_name=="contig_4957_2_2626_-",]$annotation="putative major core protein [Bloomfield virus]"
We built a phylogeny based on putative RdRp:
p = plot_phylogeny("../phylogenies/contig_4942_3_2741_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,14)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_4942_3_2741.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=paste0("contig_", c(5007,6823, 8100))
contig_set_unassigned=NA
#contig_set_unassigned=paste0("contig_", c(16163, 10916, 19572, 7503, 12982, 7433))
# store for later fusion of corresponding lines
virus_list$"Reoviridae6"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae6")
## orf_name seqid source type start end score strand
## 344 contig_5007_80_2752_+ contig_5007 getorf_JV gene 80 2752 . TRUE
## 372 contig_6823_34_2163_+ contig_6823 getorf_JV gene 34 2163 . TRUE
## 407 contig_8100_17_1819_- contig_8100 getorf_JV gene 17 1819 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 344 1 2673 2752 YP_009059074.1 0.218 835
## 372 1 2130 2163 YP_009072449.1 0.275 515
## 407 1 1803 1917 YP_009072449.1 0.250 539
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 344 629 0 13 817 119 953 1.102e-11 74
## 372 364 0 127 641 126 628 5.960e-41 169
## 407 398 0 20 550 750 1288 8.838e-37 154
## annotation
## 344 hypothetical protein [Cimodo virus]
## 372 RNA-dependent RNA polymerase [Cimodo virus]
## 407 RNA-dependent RNA polymerase [Cimodo virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 344 contig_5007_80_2752_+ contig_5007 getorf_JV gene 80 2752 . TRUE
## 372 contig_6823_34_2163_+ contig_6823 getorf_JV gene 34 2163 . TRUE
## 407 contig_8100_17_1819_- contig_8100 getorf_JV gene 17 1819 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 344 1 2673 2752 YP_009059074.1 0.218 835
## 372 1 2130 2163 YP_009072449.1 0.275 515
## 407 1 1803 1917 YP_009072449.1 0.250 539
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 344 629 0 13 817 119 953 1.102e-11 74
## 372 364 0 127 641 126 628 5.960e-41 169
## 407 398 0 20 550 750 1288 8.838e-37 154
## annotation
## 344 hypothetical protein [Cimodo virus]
## 372 RNA-dependent RNA polymerase [Cimodo virus]
## 407 RNA-dependent RNA polymerase [Cimodo virus]
Contig_7503 has a very weak hit (10e-5) with cimodo-like virus : MAG: hypothetical protein [Diaphorina citri cimodo-like virus] Sequence ID: QXG83186.1Length: 700 Other unassigned contigs have no hit.
The RdRp protein is split in two contigs (contig 6823 for the first part and 8100 for the C-terminal).
We built a phylogeny based on the fusion of both parts of the RdRp:
p = plot_phylogeny("../phylogenies/contig_8100_17_1819_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,8)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_8100_17_1819.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=paste0("contig_", c(2030))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Qinviridae_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Qinviridae_L.h")
## orf_name seqid source type start end score strand
## 257 contig_2030_53_5533_+ contig_2030 getorf_JV gene 53 5533 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 257 1 5481 5586 YP_009342465.1 0.382 1645
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 257 1006 0 1 1645 1 1629 0 1096
## annotation
## 257 RNA-dependent RNA polymerase [Wuhan insect virus 15]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 257 contig_2030_53_5533_+ contig_2030 getorf_JV gene 53 5533 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 257 1 5481 5586 YP_009342465.1 0.382 1645
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 257 1006 0 1 1645 1 1629 0 1096
## annotation
## 257 RNA-dependent RNA polymerase [Wuhan insect virus 15]
A single contig is found, where we expect a bisegmented genome (1601bp => hypo prot; 5889 bp=> RdRp). It contains a full length RdRp protein.
https://www.genome.jp/virushostdb/1923719
We built a phylogeny based on the fusion of both parts of the RdRp:
p = plot_phylogeny("../phylogenies/contig_2030_53_5533_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,14)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_2030.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=paste0("contig_", c(18281, 17904))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Iflaviridae_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Formica_Lh")
## orf_name seqid source type start end score strand
## 215 contig_17904_3_1124_- contig_17904 getorf_JV gene 3 1124 . FALSE
## 221 contig_18281_88_1095_- contig_18281 getorf_JV gene 88 1095 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 215 1 1122 1142 YP_008888537.1 0.378 307
## 221 1 1008 1127 YP_008888537.1 0.411 243
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 215 190 0 2 308 1445 1751 1.674e-58 213
## 221 142 0 93 335 864 1105 3.409e-54 199
## annotation
## 215 polyprotein [Formica exsecta virus 2]
## 221 polyprotein [Formica exsecta virus 2]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 215 contig_17904_3_1124_- contig_17904 getorf_JV gene 3 1124 . FALSE
## 221 contig_18281_88_1095_- contig_18281 getorf_JV gene 88 1095 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 215 1 1122 1142 YP_008888537.1 0.378 307
## 221 1 1008 1127 YP_008888537.1 0.411 243
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 215 190 0 2 308 1445 1751 1.674e-58 213
## 221 142 0 93 335 864 1105 3.409e-54 199
## annotation
## 215 polyprotein [Formica exsecta virus 2]
## 221 polyprotein [Formica exsecta virus 2]
Formica exsecta virus has a 9160bp genome encoding a polyprotein.
https://www.genome.jp/dbget-bin/www_bget?refseq:NC_023022
The genome here is thus incomplete for sure. Dhaygude et al. PeerJ 2019
Domains for contig_17904
Domains for contig_18281
In the absence of RdRp in our sequences, we built a phylogeny based on the capsid domain :
p = plot_phylogeny("../phylogenies/contig_18281_88_1095_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
#p = p + xlim(0,8)
ggsave(filename = "../phylogenies/contig_18281_88_1095.pdf", plot = p, width = 8, height = 10)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=paste0("contig_", c(8520, 13838, 19523, 9814, 9238, 6934, 9023, 10017))
contig_set_unassigned=paste0("contig_", c(13351, 9049, 8831))
# store for later fusion of corresponding lines
virus_list$"Quenyavirus_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Quenyavirus_L.h")
## orf_name seqid source type start end score strand
## 1 contig_10017_205_1635_- contig_10017 getorf_JV gene 205 1635 . FALSE
## 109 contig_13351_1_159_- contig_13351 getorf_JV gene 1 159 . FALSE
## 110 contig_13351_1_180_- contig_13351 getorf_JV gene 1 180 . FALSE
## 111 contig_13351_306_1307_- contig_13351 getorf_JV gene 306 1307 . FALSE
## 112 contig_13351_306_1370_- contig_13351 getorf_JV gene 306 1370 . FALSE
## 121 contig_13838_287_1228_- contig_13838 getorf_JV gene 287 1228 . FALSE
## 231 contig_19523_74_730_+ contig_19523 getorf_JV gene 74 730 . TRUE
## 232 contig_19523_857_1084_+ contig_19523 getorf_JV gene 857 1084 . TRUE
## 375 contig_6934_21_2027_+ contig_6934 getorf_JV gene 21 2027 . TRUE
## 421 contig_8520_33_1451_+ contig_8520 getorf_JV gene 33 1451 . TRUE
## 426 contig_8831_14_1726_+ contig_8831 getorf_JV gene 14 1726 . TRUE
## 427 contig_8831_2_1726_+ contig_8831 getorf_JV gene 2 1726 . TRUE
## 429 contig_9023_366_1742_- contig_9023 getorf_JV gene 366 1742 . FALSE
## 432 contig_9049_2_1744_+ contig_9049 getorf_JV gene 2 1744 . TRUE
## 433 contig_9049_20_1744_+ contig_9049 getorf_JV gene 20 1744 . TRUE
## 442 contig_9238_15_1424_+ contig_9238 getorf_JV gene 15 1424 . TRUE
## 443 contig_9238_1569_1757_+ contig_9238 getorf_JV gene 1569 1757 . TRUE
## 469 contig_9814_148_1650_- contig_9814 getorf_JV gene 148 1650 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 1 1 1431 1662 AVB77242.1 0.665 476 158
## 109 1 159 1372 <NA> NA NA NA
## 110 2 180 1372 <NA> NA NA NA
## 111 1 1002 1372 <NA> NA NA NA
## 112 2 1065 1372 <NA> NA NA NA
## 121 1 942 1339 <NA> NA NA NA
## 231 1 657 1084 AVB77242.1 0.339 213 139
## 232 1 228 1084 <NA> NA NA NA
## 375 1 2007 2138 AYU49214.1 0.744 670 171
## 421 1 1419 1850 AVB77240.1 0.346 312 193
## 426 1 1713 1809 <NA> NA NA NA
## 427 2 1725 1809 <NA> NA NA NA
## 429 1 1377 1784 AVB77241.1 0.512 453 216
## 432 2 1743 1781 <NA> NA NA NA
## 433 1 1725 1781 <NA> NA NA NA
## 442 1 1410 1758 AVB77240.1 0.559 450 193
## 443 1 189 1758 <NA> NA NA NA
## 469 1 1503 1686 AVB77239.1 0.667 502 167
## gap_opens qstart qend sstart send evalue bitscore
## 1 0 1 476 1 474 9.200e-212 662
## 109 NA NA NA NA NA NA NA
## 110 NA NA NA NA NA NA NA
## 111 NA NA NA NA NA NA NA
## 112 NA NA NA NA NA NA NA
## 121 NA NA NA NA NA NA NA
## 231 0 2 212 259 471 2.999e-27 115
## 232 NA NA NA NA NA NA NA
## 375 0 1 669 1 670 0.000e+00 1050
## 421 0 41 336 4 315 2.908e-49 189
## 426 NA NA NA NA NA NA NA
## 427 NA NA NA NA NA NA NA
## 429 0 7 459 5 448 5.129e-132 431
## 432 NA NA NA NA NA NA NA
## 433 NA NA NA NA NA NA NA
## 442 0 4 453 1 439 5.578e-158 507
## 443 NA NA NA NA NA NA NA
## 469 0 1 501 1 502 2.685e-233 726
## annotation
## 1 hypothetical protein [Kwi virus]
## 109 <NA>
## 110 <NA>
## 111 <NA>
## 112 <NA>
## 121 <NA>
## 231 hypothetical protein [Kwi virus]
## 232 <NA>
## 375 putative RNA polymerase [Kwi virus]
## 421 hypothetical protein [Kwi virus]
## 426 <NA>
## 427 <NA>
## 429 hypothetical protein [Kwi virus]
## 432 <NA>
## 433 <NA>
## 442 hypothetical protein [Kwi virus]
## 443 <NA>
## 469 hypothetical protein [Kwi virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 1 contig_10017_205_1635_- contig_10017 getorf_JV gene 205 1635 . FALSE
## 109 contig_13351_1_159_- contig_13351 getorf_JV gene 1 159 . FALSE
## 110 contig_13351_1_180_- contig_13351 getorf_JV gene 1 180 . FALSE
## 111 contig_13351_306_1307_- contig_13351 getorf_JV gene 306 1307 . FALSE
## 112 contig_13351_306_1370_- contig_13351 getorf_JV gene 306 1370 . FALSE
## 121 contig_13838_287_1228_- contig_13838 getorf_JV gene 287 1228 . FALSE
## 231 contig_19523_74_730_+ contig_19523 getorf_JV gene 74 730 . TRUE
## 232 contig_19523_857_1084_+ contig_19523 getorf_JV gene 857 1084 . TRUE
## 375 contig_6934_21_2027_+ contig_6934 getorf_JV gene 21 2027 . TRUE
## 421 contig_8520_33_1451_+ contig_8520 getorf_JV gene 33 1451 . TRUE
## 426 contig_8831_14_1726_+ contig_8831 getorf_JV gene 14 1726 . TRUE
## 427 contig_8831_2_1726_+ contig_8831 getorf_JV gene 2 1726 . TRUE
## 429 contig_9023_366_1742_- contig_9023 getorf_JV gene 366 1742 . FALSE
## 432 contig_9049_2_1744_+ contig_9049 getorf_JV gene 2 1744 . TRUE
## 433 contig_9049_20_1744_+ contig_9049 getorf_JV gene 20 1744 . TRUE
## 442 contig_9238_15_1424_+ contig_9238 getorf_JV gene 15 1424 . TRUE
## 443 contig_9238_1569_1757_+ contig_9238 getorf_JV gene 1569 1757 . TRUE
## 469 contig_9814_148_1650_- contig_9814 getorf_JV gene 148 1650 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 1 1 1431 1662 AVB77242.1 0.665 476 158
## 109 1 159 1372 <NA> NA NA NA
## 110 2 180 1372 <NA> NA NA NA
## 111 1 1002 1372 <NA> NA NA NA
## 112 2 1065 1372 <NA> NA NA NA
## 121 1 942 1339 <NA> NA NA NA
## 231 1 657 1084 AVB77242.1 0.339 213 139
## 232 1 228 1084 <NA> NA NA NA
## 375 1 2007 2138 AYU49214.1 0.744 670 171
## 421 1 1419 1850 AVB77240.1 0.346 312 193
## 426 1 1713 1809 <NA> NA NA NA
## 427 2 1725 1809 <NA> NA NA NA
## 429 1 1377 1784 AVB77241.1 0.512 453 216
## 432 2 1743 1781 <NA> NA NA NA
## 433 1 1725 1781 <NA> NA NA NA
## 442 1 1410 1758 AVB77240.1 0.559 450 193
## 443 1 189 1758 <NA> NA NA NA
## 469 1 1503 1686 AVB77239.1 0.667 502 167
## gap_opens qstart qend sstart send evalue bitscore
## 1 0 1 476 1 474 9.200e-212 662
## 109 NA NA NA NA NA NA NA
## 110 NA NA NA NA NA NA NA
## 111 NA NA NA NA NA NA NA
## 112 NA NA NA NA NA NA NA
## 121 NA NA NA NA NA NA NA
## 231 0 2 212 259 471 2.999e-27 115
## 232 NA NA NA NA NA NA NA
## 375 0 1 669 1 670 0.000e+00 1050
## 421 0 41 336 4 315 2.908e-49 189
## 426 NA NA NA NA NA NA NA
## 427 NA NA NA NA NA NA NA
## 429 0 7 459 5 448 5.129e-132 431
## 432 NA NA NA NA NA NA NA
## 433 NA NA NA NA NA NA NA
## 442 0 4 453 1 439 5.578e-158 507
## 443 NA NA NA NA NA NA NA
## 469 0 1 501 1 502 2.685e-233 726
## annotation
## 1 hypothetical protein [Kwi virus]
## 109 <NA>
## 110 <NA>
## 111 <NA>
## 112 <NA>
## 121 <NA>
## 231 hypothetical protein [Kwi virus]
## 232 <NA>
## 375 putative RNA polymerase [Kwi virus]
## 421 hypothetical protein [Kwi virus]
## 426 <NA>
## 427 <NA>
## 429 hypothetical protein [Kwi virus]
## 432 <NA>
## 433 <NA>
## 442 hypothetical protein [Kwi virus]
## 443 <NA>
## 469 hypothetical protein [Kwi virus]
The 3 unannotated contigs show no hits based on blastx on nr. However, they do have a nice ORF (with gertorf option 0).
Kwi virus have been described in Obbard et al. 2020 (see fig. 1) from “dark matter” of Webster et al. 2015. Composed (for now) of 5 segments approx 2kb each. A new virus family probably.
Let’s check the blastx result from the original contigs :
blastx=read.table("../TABLES/all_taxonomy.blastx", sep=";",h=T)
blastx_subset=blastx[blastx$query %in% contig_set,]
contig_9814 => seg 1 +++ contig_8520 => seg 2 contig_9238 => seg2 contig_9023 => seg3 contig_13838 => seg 3 contig_10017 => seg4 contig_19523 => seg 4 contig_6934 => seg5 +++
contig_8520 and contig_9238 appear to be homologous contig_9023 and contig_13838 appear to be homologous contig_10017 and contig_19523 appear to be homologous
It looks like two variants are present. Both are present in Lh Ige June 2012, whereas only one is present at the same location in 2011; the other variant is the unique we found in Lh Goth 2012…
Its is likely that the three additional contigs are part of the genome also… Need to check co-occurrence on a wider set of samples.
Phylogeny on RdRp (with homologs from nr - blastx) :
p = plot_phylogeny("../phylogenies/Kwi_RdRp_homogs_nr-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,10)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/Kwi_RdRp_homogs_nr.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=paste0("contig_", c(8242))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Hermitage_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Hermitage_L.h")
## orf_name seqid source type start end score strand
## 418 contig_8242_167_1894_+ contig_8242 getorf_JV gene 167 1894 . TRUE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 418 1 1728 1894 AMO03217.1 0.927 576 42
## gap_opens qstart qend sstart send evalue bitscore
## 418 0 1 576 8 583 0 1079
## annotation
## 418 putative polyprotein, partial [Hermitage virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 418 contig_8242_167_1894_+ contig_8242 getorf_JV gene 167 1894 . TRUE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 418 1 1728 1894 AMO03217.1 0.927 576 42
## gap_opens qstart qend sstart send evalue bitscore
## 418 0 1 576 8 583 0 1079
## annotation
## 418 putative polyprotein, partial [Hermitage virus]
No phylogeny was built since a single sequence showed homology (Hermitage virus).
Described in Webster et al. 2016 : “rna. related to Gentian Kobu-sho-associated virus (reported to be dsrna74) and a virus-like transcript from Conwentzia pso- ciformis. Distantly related to soybean cyst nematode virus 5 and the Flavivirus-like Xinzhou spider virus 2. [Two un-joined contigs of 3.2 kbp and 3.5 kbp encoding a putative polyprotein]”
Define the corresponding contigs :
contig_set=paste0("contig_", c(21211))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Phasmaviridae_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Phasmaviridae_L.h")
## orf_name seqid source type start end score strand
## 268 contig_21211_98_649_- contig_21211 getorf_JV gene 98 649 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 268 1 552 1032 YP_009666983.1 0.431 176
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 268 98 0 1 176 137 310 3.072e-39 148
## annotation
## 268 nucleoprotein [Ganda bee virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 268 contig_21211_98_649_- contig_21211 getorf_JV gene 98 649 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 268 1 552 1032 YP_009666983.1 0.431 176
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 268 98 0 1 176 137 310 3.072e-39 148
## annotation
## 268 nucleoprotein [Ganda bee virus]
we built a phylogeny based on the unique protein (nucleoprotein) :
p = plot_phylogeny("../phylogenies/contig_21211_98_649_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,4)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_21211_98_649.pdf", plot = p, width = 8)
## Saving 8 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Surprisingly, we found Drosophila sequences ! Need to check that !
Orthophasmavirus are segmented viruses (3 segments 2.2kb, 6.7kb and
2.8kb).
See files Orthophasmivirus_Drosophila*
HGT in Drosophila?
With nr homologs (ncbi queried on 22 march 2023)
p = plot_phylogeny("../phylogenies/Ganda_nr_short-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,7)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/Ganda_nr_short-PhyML_tree.pdf", plot = p, width = 12, height = 11)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=paste0("contig_", c(21655))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Rhabdoviridae2"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Rhabdoviridae2")
## orf_name seqid source type start end score strand
## 278 contig_21655_43_606_+ contig_21655 getorf_JV gene 43 606 . TRUE
## 279 contig_21655_652_1020_+ contig_21655 getorf_JV gene 652 1020 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 278 1 564 1021 YP_009337067.1 0.439 188
## 279 1 369 1021 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 278 98 0 1 188 246 420 1.026e-39 150
## 279 NA NA NA NA NA NA NA NA
## annotation
## 278 putative nucleoprotein [Hubei dimarhabdovirus virus 2]
## 279 <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 278 contig_21655_43_606_+ contig_21655 getorf_JV gene 43 606 . TRUE
## 279 contig_21655_652_1020_+ contig_21655 getorf_JV gene 652 1020 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 278 1 564 1021 YP_009337067.1 0.439 188
## 279 1 369 1021 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 278 98 0 1 188 246 420 1.026e-39 150
## 279 NA NA NA NA NA NA NA NA
## annotation
## 278 putative nucleoprotein [Hubei dimarhabdovirus virus 2]
## 279 <NA>
we built a phylogeny based on the unique protein (nucleoprotein) :
p = plot_phylogeny("../phylogenies/contig_21655_43_606_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,7)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_21655_43_606_+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
See Webster et al (2016) Evolutionary Bioinformatics. Detected in D. melanogaster.
rna genome. related to Gentian Kobu-sho-associated virus (reported to be dsrna74) and a virus-like transcript from Conwentzia psociformis. Distantly related to soybean cyst nematode virus 5 and the flavivirus-like Xinzhou spider virus 2 (ref. 75). Derived from pools E and K of Webster et al.31, this virus incorporates flavivirus-like sequence KP757925 that was previously reported there. [Two un-joined contigs of 2.3 kbp and 3.9 kbp encoding a putative polyprotein]
Define the corresponding contigs :
contig_set=paste0("contig_", c(11017))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Flaviviridae1_L.h"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Flaviviridae1_L.h")
## orf_name seqid source type start end score strand
## 30 contig_11017_3_1493_- contig_11017 getorf_JV gene 3 1493 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 30 1 1491 1557 AMO03219.1 0.316 173 115
## gap_opens qstart qend sstart send evalue bitscore
## 30 0 327 495 27 199 8.94e-16 86
## annotation
## 30 putative polyprotein, partial [Takaungu virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 30 contig_11017_3_1493_- contig_11017 getorf_JV gene 3 1493 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 30 1 1491 1557 AMO03219.1 0.316 173 115
## gap_opens qstart qend sstart send evalue bitscore
## 30 0 327 495 27 199 8.94e-16 86
## annotation
## 30 putative polyprotein, partial [Takaungu virus]
No phylogeny was built since only one homologous sequence was identified.
Define the corresponding contigs :
contig_set=paste0("contig_", c(12726, 18151, 8245, 7089, 6361, 13248, 6912, 16886, 14483))
contig_set_unassigned=paste0("contig_", c(21782, 17382, 12974, 11366, 20468, 15565, 10231, 16123,6070, 4706, 9848, 11299, 18944))
# store for later fusion of corresponding lines
virus_list$"Reoviridae5_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Reoviridae5_Tricho")
## orf_name seqid source type start end score strand
## 16 contig_10231_1_1638_+ contig_10231 getorf_JV gene 1 1638 . TRUE
## 17 contig_10231_205_1638_+ contig_10231 getorf_JV gene 205 1638 . TRUE
## 42 contig_11299_2_1444_+ contig_11299 getorf_JV gene 2 1444 . TRUE
## 43 contig_11299_8_1444_+ contig_11299 getorf_JV gene 8 1444 . TRUE
## 44 contig_11366_1_159_- contig_11366 getorf_JV gene 1 159 . FALSE
## 45 contig_11366_172_1461_- contig_11366 getorf_JV gene 172 1461 . FALSE
## 46 contig_11366_172_1485_- contig_11366 getorf_JV gene 172 1485 . FALSE
## 85 contig_12726_9_1265_+ contig_12726 getorf_JV gene 9 1265 . TRUE
## 88 contig_12974_68_1387_- contig_12974 getorf_JV gene 68 1387 . FALSE
## 89 contig_12974_68_1396_- contig_12974 getorf_JV gene 68 1396 . FALSE
## 108 contig_13248_140_1234_+ contig_13248 getorf_JV gene 140 1234 . TRUE
## 131 contig_14483_3_1301_+ contig_14483 getorf_JV gene 3 1301 . TRUE
## 132 contig_14483_39_1301_+ contig_14483 getorf_JV gene 39 1301 . TRUE
## 144 contig_15565_1_1245_+ contig_15565 getorf_JV gene 1 1245 . TRUE
## 145 contig_15565_34_1245_+ contig_15565 getorf_JV gene 34 1245 . TRUE
## 157 contig_16123_3_1100_- contig_16123 getorf_JV gene 3 1100 . FALSE
## 158 contig_16123_3_1124_- contig_16123 getorf_JV gene 3 1124 . FALSE
## 173 contig_16886_14_1174_- contig_16886 getorf_JV gene 14 1174 . FALSE
## 194 contig_17382_160_897_+ contig_17382 getorf_JV gene 160 897 . TRUE
## 195 contig_17382_97_897_+ contig_17382 getorf_JV gene 97 897 . TRUE
## 218 contig_18151_3_1112_- contig_18151 getorf_JV gene 3 1112 . FALSE
## 228 contig_18944_27_1100_+ contig_18944 getorf_JV gene 27 1100 . TRUE
## 229 contig_18944_3_1100_+ contig_18944 getorf_JV gene 3 1100 . TRUE
## 260 contig_20468_49_1020_- contig_20468 getorf_JV gene 49 1020 . FALSE
## 261 contig_20468_49_918_- contig_20468 getorf_JV gene 49 918 . FALSE
## 283 contig_21782_254_1018_- contig_21782 getorf_JV gene 254 1018 . FALSE
## 284 contig_21782_254_991_- contig_21782 getorf_JV gene 254 991 . FALSE
## 335 contig_4706_33_2876_+ contig_4706 getorf_JV gene 33 2876 . TRUE
## 336 contig_4706_66_2876_+ contig_4706 getorf_JV gene 66 2876 . TRUE
## 360 contig_6070_558_2207_- contig_6070 getorf_JV gene 558 2207 . FALSE
## 361 contig_6070_558_2231_- contig_6070 getorf_JV gene 558 2231 . FALSE
## 367 contig_6361_19_2178_+ contig_6361 getorf_JV gene 19 2178 . TRUE
## 373 contig_6912_73_2133_- contig_6912 getorf_JV gene 73 2133 . FALSE
## 378 contig_7089_3_2078_- contig_7089 getorf_JV gene 3 2078 . FALSE
## 419 contig_8245_37_1761_- contig_8245 getorf_JV gene 37 1761 . FALSE
## 470 contig_9848_1_1524_+ contig_9848 getorf_JV gene 1 1524 . TRUE
## 471 contig_9848_25_1524_+ contig_9848 getorf_JV gene 25 1524 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 16 2 1638 1638 <NA> NA NA
## 17 1 1434 1638 <NA> NA NA
## 42 2 1443 1532 <NA> NA NA
## 43 1 1437 1532 <NA> NA NA
## 44 2 159 1526 <NA> NA NA
## 45 1 1290 1526 <NA> NA NA
## 46 2 1314 1526 <NA> NA NA
## 85 1 1257 1266 YP_001111373.1 0.325 384
## 88 1 1320 1398 <NA> NA NA
## 89 2 1329 1398 <NA> NA NA
## 108 1 1095 1235 YP_009508276.1 0.350 347
## 131 2 1299 1302 <NA> NA NA
## 132 1 1263 1302 <NA> NA NA
## 144 2 1245 1245 <NA> NA NA
## 145 1 1212 1245 <NA> NA NA
## 157 1 1098 1217 <NA> NA NA
## 158 2 1122 1217 <NA> NA NA
## 173 1 1161 1183 YP_001111369.1 0.274 217
## 194 1 738 1162 <NA> NA NA
## 195 2 801 1162 <NA> NA NA
## 218 1 1110 1132 YP_002790884.1 0.334 375
## 228 1 1074 1102 <NA> NA NA
## 229 2 1098 1102 <NA> NA NA
## 260 2 972 1055 <NA> NA NA
## 261 1 870 1055 <NA> NA NA
## 283 2 765 1018 <NA> NA NA
## 284 1 738 1018 <NA> NA NA
## 335 2 2844 2876 <NA> NA NA
## 336 1 2811 2876 <NA> NA NA
## 360 1 1650 2359 <NA> NA NA
## 361 2 1674 2359 <NA> NA NA
## 367 1 2160 2277 YP_009389548.1 0.205 709
## 373 1 2061 2144 YP_009389548.1 0.202 659
## 378 1 2076 2101 YP_009508276.1 0.313 608
## 419 1 1725 1894 NP_620544.1 0.316 528
## 470 2 1524 1682 <NA> NA NA
## 471 1 1500 1682 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 16 NA NA NA NA NA NA NA NA
## 17 NA NA NA NA NA NA NA NA
## 42 NA NA NA NA NA NA NA NA
## 43 NA NA NA NA NA NA NA NA
## 44 NA NA NA NA NA NA NA NA
## 45 NA NA NA NA NA NA NA NA
## 46 NA NA NA NA NA NA NA NA
## 85 246 0 55 419 50 433 5.282e-48 184
## 88 NA NA NA NA NA NA NA NA
## 89 NA NA NA NA NA NA NA NA
## 108 224 0 17 363 442 787 1.810e-66 236
## 131 NA NA NA NA NA NA NA NA
## 132 NA NA NA NA NA NA NA NA
## 144 NA NA NA NA NA NA NA NA
## 145 NA NA NA NA NA NA NA NA
## 157 NA NA NA NA NA NA NA NA
## 158 NA NA NA NA NA NA NA NA
## 173 157 0 143 359 916 1132 2.995e-15 83
## 194 NA NA NA NA NA NA NA NA
## 195 NA NA NA NA NA NA NA NA
## 218 240 0 8 368 403 777 1.924e-53 198
## 228 NA NA NA NA NA NA NA NA
## 229 NA NA NA NA NA NA NA NA
## 260 NA NA NA NA NA NA NA NA
## 261 NA NA NA NA NA NA NA NA
## 283 NA NA NA NA NA NA NA NA
## 284 NA NA NA NA NA NA NA NA
## 335 NA NA NA NA NA NA NA NA
## 336 NA NA NA NA NA NA NA NA
## 360 NA NA NA NA NA NA NA NA
## 361 NA NA NA NA NA NA NA NA
## 367 547 0 8 716 309 997 4.210e-25 118
## 373 509 0 1 659 358 996 8.667e-21 104
## 378 414 0 89 692 20 627 1.050e-79 287
## 419 345 0 1 528 869 1373 1.444e-64 238
## 470 NA NA NA NA NA NA NA NA
## 471 NA NA NA NA NA NA NA NA
## annotation
## 16 <NA>
## 17 <NA>
## 42 <NA>
## 43 <NA>
## 44 <NA>
## 45 <NA>
## 46 <NA>
## 85 putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 88 <NA>
## 89 <NA>
## 108 polypeptide P5 [Wound tumor virus]
## 131 <NA>
## 132 <NA>
## 144 <NA>
## 145 <NA>
## 157 <NA>
## 158 <NA>
## 173 RGDV P2 [Rice gall dwarf virus]
## 194 <NA>
## 195 <NA>
## 218 RNA-directed RNA polymerase [Homalodisca vitripennis reovirus]
## 228 <NA>
## 229 <NA>
## 260 <NA>
## 261 <NA>
## 283 <NA>
## 284 <NA>
## 335 <NA>
## 336 <NA>
## 360 <NA>
## 361 <NA>
## 367 putative major core protein [Aedes camptorhynchus reo-like virus]
## 373 putative major core protein [Aedes camptorhynchus reo-like virus]
## 378 polypeptide P5 [Wound tumor virus]
## 419 RNA-dependent RNA polymerase [Rice dwarf virus]
## 470 <NA>
## 471 <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 16 contig_10231_1_1638_+ contig_10231 getorf_JV gene 1 1638 . TRUE
## 17 contig_10231_205_1638_+ contig_10231 getorf_JV gene 205 1638 . TRUE
## 42 contig_11299_2_1444_+ contig_11299 getorf_JV gene 2 1444 . TRUE
## 43 contig_11299_8_1444_+ contig_11299 getorf_JV gene 8 1444 . TRUE
## 44 contig_11366_1_159_- contig_11366 getorf_JV gene 1 159 . FALSE
## 45 contig_11366_172_1461_- contig_11366 getorf_JV gene 172 1461 . FALSE
## 46 contig_11366_172_1485_- contig_11366 getorf_JV gene 172 1485 . FALSE
## 85 contig_12726_9_1265_+ contig_12726 getorf_JV gene 9 1265 . TRUE
## 88 contig_12974_68_1387_- contig_12974 getorf_JV gene 68 1387 . FALSE
## 89 contig_12974_68_1396_- contig_12974 getorf_JV gene 68 1396 . FALSE
## 108 contig_13248_140_1234_+ contig_13248 getorf_JV gene 140 1234 . TRUE
## 131 contig_14483_3_1301_+ contig_14483 getorf_JV gene 3 1301 . TRUE
## 132 contig_14483_39_1301_+ contig_14483 getorf_JV gene 39 1301 . TRUE
## 144 contig_15565_1_1245_+ contig_15565 getorf_JV gene 1 1245 . TRUE
## 145 contig_15565_34_1245_+ contig_15565 getorf_JV gene 34 1245 . TRUE
## 157 contig_16123_3_1100_- contig_16123 getorf_JV gene 3 1100 . FALSE
## 158 contig_16123_3_1124_- contig_16123 getorf_JV gene 3 1124 . FALSE
## 173 contig_16886_14_1174_- contig_16886 getorf_JV gene 14 1174 . FALSE
## 194 contig_17382_160_897_+ contig_17382 getorf_JV gene 160 897 . TRUE
## 195 contig_17382_97_897_+ contig_17382 getorf_JV gene 97 897 . TRUE
## 218 contig_18151_3_1112_- contig_18151 getorf_JV gene 3 1112 . FALSE
## 228 contig_18944_27_1100_+ contig_18944 getorf_JV gene 27 1100 . TRUE
## 229 contig_18944_3_1100_+ contig_18944 getorf_JV gene 3 1100 . TRUE
## 260 contig_20468_49_1020_- contig_20468 getorf_JV gene 49 1020 . FALSE
## 261 contig_20468_49_918_- contig_20468 getorf_JV gene 49 918 . FALSE
## 283 contig_21782_254_1018_- contig_21782 getorf_JV gene 254 1018 . FALSE
## 284 contig_21782_254_991_- contig_21782 getorf_JV gene 254 991 . FALSE
## 335 contig_4706_33_2876_+ contig_4706 getorf_JV gene 33 2876 . TRUE
## 336 contig_4706_66_2876_+ contig_4706 getorf_JV gene 66 2876 . TRUE
## 360 contig_6070_558_2207_- contig_6070 getorf_JV gene 558 2207 . FALSE
## 361 contig_6070_558_2231_- contig_6070 getorf_JV gene 558 2231 . FALSE
## 367 contig_6361_19_2178_+ contig_6361 getorf_JV gene 19 2178 . TRUE
## 373 contig_6912_73_2133_- contig_6912 getorf_JV gene 73 2133 . FALSE
## 378 contig_7089_3_2078_- contig_7089 getorf_JV gene 3 2078 . FALSE
## 419 contig_8245_37_1761_- contig_8245 getorf_JV gene 37 1761 . FALSE
## 470 contig_9848_1_1524_+ contig_9848 getorf_JV gene 1 1524 . TRUE
## 471 contig_9848_25_1524_+ contig_9848 getorf_JV gene 25 1524 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 16 2 1638 1638 <NA> NA NA
## 17 1 1434 1638 <NA> NA NA
## 42 2 1443 1532 <NA> NA NA
## 43 1 1437 1532 <NA> NA NA
## 44 2 159 1526 <NA> NA NA
## 45 1 1290 1526 <NA> NA NA
## 46 2 1314 1526 <NA> NA NA
## 85 1 1257 1266 YP_001111373.1 0.325 384
## 88 1 1320 1398 <NA> NA NA
## 89 2 1329 1398 <NA> NA NA
## 108 1 1095 1235 YP_009508276.1 0.350 347
## 131 2 1299 1302 <NA> NA NA
## 132 1 1263 1302 <NA> NA NA
## 144 2 1245 1245 <NA> NA NA
## 145 1 1212 1245 <NA> NA NA
## 157 1 1098 1217 <NA> NA NA
## 158 2 1122 1217 <NA> NA NA
## 173 1 1161 1183 YP_001111369.1 0.274 217
## 194 1 738 1162 <NA> NA NA
## 195 2 801 1162 <NA> NA NA
## 218 1 1110 1132 YP_002790884.1 0.334 375
## 228 1 1074 1102 <NA> NA NA
## 229 2 1098 1102 <NA> NA NA
## 260 2 972 1055 <NA> NA NA
## 261 1 870 1055 <NA> NA NA
## 283 2 765 1018 <NA> NA NA
## 284 1 738 1018 <NA> NA NA
## 335 2 2844 2876 <NA> NA NA
## 336 1 2811 2876 <NA> NA NA
## 360 1 1650 2359 <NA> NA NA
## 361 2 1674 2359 <NA> NA NA
## 367 1 2160 2277 YP_009389548.1 0.205 709
## 373 1 2061 2144 YP_009389548.1 0.202 659
## 378 1 2076 2101 YP_009508276.1 0.313 608
## 419 1 1725 1894 NP_620544.1 0.316 528
## 470 2 1524 1682 <NA> NA NA
## 471 1 1500 1682 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 16 NA NA NA NA NA NA NA NA
## 17 NA NA NA NA NA NA NA NA
## 42 NA NA NA NA NA NA NA NA
## 43 NA NA NA NA NA NA NA NA
## 44 NA NA NA NA NA NA NA NA
## 45 NA NA NA NA NA NA NA NA
## 46 NA NA NA NA NA NA NA NA
## 85 246 0 55 419 50 433 5.282e-48 184
## 88 NA NA NA NA NA NA NA NA
## 89 NA NA NA NA NA NA NA NA
## 108 224 0 17 363 442 787 1.810e-66 236
## 131 NA NA NA NA NA NA NA NA
## 132 NA NA NA NA NA NA NA NA
## 144 NA NA NA NA NA NA NA NA
## 145 NA NA NA NA NA NA NA NA
## 157 NA NA NA NA NA NA NA NA
## 158 NA NA NA NA NA NA NA NA
## 173 157 0 143 359 916 1132 2.995e-15 83
## 194 NA NA NA NA NA NA NA NA
## 195 NA NA NA NA NA NA NA NA
## 218 240 0 8 368 403 777 1.924e-53 198
## 228 NA NA NA NA NA NA NA NA
## 229 NA NA NA NA NA NA NA NA
## 260 NA NA NA NA NA NA NA NA
## 261 NA NA NA NA NA NA NA NA
## 283 NA NA NA NA NA NA NA NA
## 284 NA NA NA NA NA NA NA NA
## 335 NA NA NA NA NA NA NA NA
## 336 NA NA NA NA NA NA NA NA
## 360 NA NA NA NA NA NA NA NA
## 361 NA NA NA NA NA NA NA NA
## 367 547 0 8 716 309 997 4.210e-25 118
## 373 509 0 1 659 358 996 8.667e-21 104
## 378 414 0 89 692 20 627 1.050e-79 287
## 419 345 0 1 528 869 1373 1.444e-64 238
## 470 NA NA NA NA NA NA NA NA
## 471 NA NA NA NA NA NA NA NA
## annotation
## 16 <NA>
## 17 <NA>
## 42 <NA>
## 43 <NA>
## 44 <NA>
## 45 <NA>
## 46 <NA>
## 85 putative RNA dependent RNA polymerase [Rice gall dwarf virus]
## 88 <NA>
## 89 <NA>
## 108 polypeptide P5 [Wound tumor virus]
## 131 <NA>
## 132 <NA>
## 144 <NA>
## 145 <NA>
## 157 <NA>
## 158 <NA>
## 173 RGDV P2 [Rice gall dwarf virus]
## 194 <NA>
## 195 <NA>
## 218 RNA-directed RNA polymerase [Homalodisca vitripennis reovirus]
## 228 <NA>
## 229 <NA>
## 260 <NA>
## 261 <NA>
## 283 <NA>
## 284 <NA>
## 335 <NA>
## 336 <NA>
## 360 <NA>
## 361 <NA>
## 367 putative major core protein [Aedes camptorhynchus reo-like virus]
## 373 putative major core protein [Aedes camptorhynchus reo-like virus]
## 378 polypeptide P5 [Wound tumor virus]
## 419 RNA-dependent RNA polymerase [Rice dwarf virus]
## 470 <NA>
## 471 <NA>
The unannotated contigs show no hits based on blastx on nr. However, it has a nice ORF (with gertorf option 0). Include them :
Rice gall dwarf virus (RGDV), a member of the family Reoviridae, causes repeated epidemics in rice fields in southern China. An RGDV isolate collected from Guangdong Province (southern China) is mainly transmitted by leafhopper vector Recilia dorsalis in a persistent-propagative manner. The infection by RGDV induces the formation of virus-containing tubules in the plant host and insect vector. RGDV is an icosahedral double-layer particle approximately 65–70 nm in diameter, with a 12-segmented dsRNA genome (Moriyasu et al., 2000, 2007; Miyazaki et al., 2005; Zhang et al., 2008). https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3719018/
Same structure for Homalodisca vitripennis reovirus https://www.genome.jp/virushostdb/411854
We got three contigs that span the major part of the RdRp :
YP_001111373.1 is the RdRp from RGDV (1458aa long). contig_12726 covers amino acids of YP_001111373.1 from position 50 to 433 contig_18151 covers amino acids of YP_001111373.1 from position 407 to 781 contig_8245 covers amino acids of YP_001111373.1 from position 870 to 1386
Five other contigs encode other proteins.
We merge these three parts of the RdRp and build a phylogeny :
p = plot_phylogeny("../phylogenies/contig_8245_37_1761_-_fused.tree", taxo_info = wta_taxo_info)
p = p + xlim(0,3)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_8245_37_1761_-_fused.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=paste0("contig_", c(923))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Virga_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Virga_Tricho")
## orf_name seqid source type start end score strand
## 438 contig_923_1133_1591_- contig_923 getorf_JV gene 1133 1591 . FALSE
## 439 contig_923_1627_10098_- contig_923 getorf_JV gene 1627 10098 . FALSE
## 440 contig_923_282_659_- contig_923 getorf_JV gene 282 659 . FALSE
## 441 contig_923_694_1101_- contig_923 getorf_JV gene 694 1101 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 438 1 459 10210 <NA> NA NA
## 439 1 8472 10210 YP_009337659.1 0.261 2633
## 440 1 378 10210 <NA> NA NA
## 441 1 408 10210 YP_009388489.1 0.310 115
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 438 NA NA NA NA NA NA NA NA
## 439 1670 0 188 2820 179 2439 5.156e-176 619
## 440 NA NA NA NA NA NA NA NA
## 441 75 0 7 116 17 131 6.048e-04 44
## annotation
## 438 <NA>
## 439 hypothetical protein [Hubei virga-like virus 21]
## 440 <NA>
## 441 ORF4 [Lake Sinai Virus SA1]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 438 contig_923_1133_1591_- contig_923 getorf_JV gene 1133 1591 . FALSE
## 439 contig_923_1627_10098_- contig_923 getorf_JV gene 1627 10098 . FALSE
## 440 contig_923_282_659_- contig_923 getorf_JV gene 282 659 . FALSE
## 441 contig_923_694_1101_- contig_923 getorf_JV gene 694 1101 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 438 1 459 10210 <NA> NA NA
## 439 1 8472 10210 YP_009337659.1 0.261 2633
## 440 1 378 10210 <NA> NA NA
## 441 1 408 10210 YP_009388489.1 0.310 115
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 438 NA NA NA NA NA NA NA NA
## 439 1670 0 188 2820 179 2439 5.156e-176 619
## 440 NA NA NA NA NA NA NA NA
## 441 75 0 7 116 17 131 6.048e-04 44
## annotation
## 438 <NA>
## 439 hypothetical protein [Hubei virga-like virus 21]
## 440 <NA>
## 441 ORF4 [Lake Sinai Virus SA1]
Voir Kondo et al. 2019
Virga-like viruses are non segmented +ssRNA viruses. up to 10kb. It looks our assembly is complete.
contig_923_1627_10098 shows the expected domains (see fig. 2 of Kondo et al. 2019):
domains found in contig_923, third ORF
We built a phylogeny based on RdRP only :
p = plot_phylogeny(file = "../phylogenies/contig_923_1627_10098_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
#p = p + xlim(0,15) # adjust x axis
ggsave(filename = "../phylogenies/contig_923_1627_10098_-.pdf", plot = p, width = 8, height = 8)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
It clusters with viruses associated with several Drosophila species : -Beult virus is a positive strand RNA virus described from D. suzukii by Medd et al. 2018. - Bofa virus and Buckhurst virus are viruses of D. melanogaster and D. obscura, respectively (Webster et al. 2016)
Define the corresponding contigs :
contig_set=paste0("contig_", c(22765, 13828))
contig_set_unassigned=paste0("contig_", c(10503, 10178))
# store for later fusion of corresponding lines
virus_list$"Chuviridae4_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Chuvirus2_tricho")
## orf_name seqid source type start end score strand
## 14 contig_10178_129_1604_- contig_10178 getorf_JV gene 129 1604 . FALSE
## 15 contig_10178_129_1631_- contig_10178 getorf_JV gene 129 1631 . FALSE
## 21 contig_10503_2_1444_+ contig_10503 getorf_JV gene 2 1444 . TRUE
## 22 contig_10503_8_1444_+ contig_10503 getorf_JV gene 8 1444 . TRUE
## 120 contig_13828_62_1273_- contig_13828 getorf_JV gene 62 1273 . FALSE
## 298 contig_22765_27_590_+ contig_22765 getorf_JV gene 27 590 . TRUE
## 299 contig_22765_642_5144_+ contig_22765 getorf_JV gene 642 5144 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 14 1 1476 1643 <NA> NA NA
## 15 2 1503 1643 <NA> NA NA
## 21 2 1443 1610 <NA> NA NA
## 22 1 1437 1610 <NA> NA NA
## 120 1 1212 1340 YP_009337904.1 0.284 369
## 298 1 564 5144 <NA> NA NA
## 299 1 4503 5144 YP_009337089.1 0.407 1308
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 14 NA NA NA NA NA NA NA NA
## 15 NA NA NA NA NA NA NA NA
## 21 NA NA NA NA NA NA NA NA
## 22 NA NA NA NA NA NA NA NA
## 120 263 0 1 369 1791 2158 9.707e-41 162
## 298 NA NA NA NA NA NA NA NA
## 299 773 0 8 1312 210 1517 0.000e+00 1041
## annotation
## 14 <NA>
## 15 <NA>
## 21 <NA>
## 22 <NA>
## 120 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 1]
## 298 <NA>
## 299 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 3]
## Saving 7 x 5 in image
res[1]
## [[1]]
10503, 10178 have no hit but contain nice ORFs and are thus included.
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 14 contig_10178_129_1604_- contig_10178 getorf_JV gene 129 1604 . FALSE
## 15 contig_10178_129_1631_- contig_10178 getorf_JV gene 129 1631 . FALSE
## 21 contig_10503_2_1444_+ contig_10503 getorf_JV gene 2 1444 . TRUE
## 22 contig_10503_8_1444_+ contig_10503 getorf_JV gene 8 1444 . TRUE
## 120 contig_13828_62_1273_- contig_13828 getorf_JV gene 62 1273 . FALSE
## 298 contig_22765_27_590_+ contig_22765 getorf_JV gene 27 590 . TRUE
## 299 contig_22765_642_5144_+ contig_22765 getorf_JV gene 642 5144 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 14 1 1476 1643 <NA> NA NA
## 15 2 1503 1643 <NA> NA NA
## 21 2 1443 1610 <NA> NA NA
## 22 1 1437 1610 <NA> NA NA
## 120 1 1212 1340 YP_009337904.1 0.284 369
## 298 1 564 5144 <NA> NA NA
## 299 1 4503 5144 YP_009337089.1 0.407 1308
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 14 NA NA NA NA NA NA NA NA
## 15 NA NA NA NA NA NA NA NA
## 21 NA NA NA NA NA NA NA NA
## 22 NA NA NA NA NA NA NA NA
## 120 263 0 1 369 1791 2158 9.707e-41 162
## 298 NA NA NA NA NA NA NA NA
## 299 773 0 8 1312 210 1517 0.000e+00 1041
## annotation
## 14 <NA>
## 15 <NA>
## 21 <NA>
## 22 <NA>
## 120 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 1]
## 298 <NA>
## 299 RNA-dependent RNA polymerase [Hubei chuvirus-like virus 3]
The major part of the RdRp is covered, but is split between two contigs :
contig_22765 covers protein YP_009337904.1 (RNA-dependent RNA polymerase [Hubei chuvirus-like virus 1], 2172 aa long) from 201 to 1698 contig_13828_62_1273_- covers protein YP_009337904.1 (RNA-dependent RNA polymerase [Hubei chuvirus-like virus 1]) from 1791 to 2158
Hubei chuvirus-like virus 1 is composed of two segments 6873bp and 3958bp.
pfam00946, Mononegavirales RNA dependent RNA polymerase ;Members of the Mononegavirales including the Paramyxoviridae, like other non-segmented negative strand RNA viruses, have an RNA-dependent RNA polymerase composed of two subunits, a large protein L and a phosphoprotein P. This is a protein family of the L protein. The L protein confers the RNA polymerase activity on the complex. The P protein acts as a transcription factor.
The P-protein is lacking in our dataset? Partial genome.
We built a phylogeny based on RdRP domain only :
p = plot_phylogeny(file = "../phylogenies/contig_22765_642_5144_+_with_homologs-BioNJ_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,1.5) # adjust x axis
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_22765_642_5144_+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=paste0("contig_", c(8703, 6917, 9411))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Partiti-like4_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Partiti-like4_Tricho")
## orf_name seqid source type start end score strand
## 374 contig_6917_680_2107_- contig_6917 getorf_JV gene 680 2107 . FALSE
## 422 contig_8703_47_1768_+ contig_8703 getorf_JV gene 47 1768 . TRUE
## 454 contig_9411_62_1708_- contig_9411 getorf_JV gene 62 1708 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 374 1 1428 2143 YP_009346040.1 0.380 350
## 422 1 1722 1827 YP_009346039.1 0.488 538
## 454 1 1647 1737 YP_009346040.1 0.390 398
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 374 216 0 99 448 124 473 1.201e-60 223
## 422 274 0 18 554 23 560 1.014e-167 540
## 454 237 0 136 533 112 501 7.746e-79 280
## annotation
## 374 hypothetical protein [Wuhan insect virus 22]
## 422 RdRp [Wuhan insect virus 22]
## 454 hypothetical protein [Wuhan insect virus 22]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 374 contig_6917_680_2107_- contig_6917 getorf_JV gene 680 2107 . FALSE
## 422 contig_8703_47_1768_+ contig_8703 getorf_JV gene 47 1768 . TRUE
## 454 contig_9411_62_1708_- contig_9411 getorf_JV gene 62 1708 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 374 1 1428 2143 YP_009346040.1 0.380 350
## 422 1 1722 1827 YP_009346039.1 0.488 538
## 454 1 1647 1737 YP_009346040.1 0.390 398
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 374 216 0 99 448 124 473 1.201e-60 223
## 422 274 0 18 554 23 560 1.014e-167 540
## 454 237 0 136 533 112 501 7.746e-79 280
## annotation
## 374 hypothetical protein [Wuhan insect virus 22]
## 422 RdRp [Wuhan insect virus 22]
## 454 hypothetical protein [Wuhan insect virus 22]
Wuhan insect virus 22 is composed of two segments a 1869bp coding the RdRp, and a 1766bp coding an hypothetical protein. It is unclear whether the three contigs belong to the same virus or if only one of the two “hypothetical protein” encoding contigs do.
We built a phylogeny based on RdRP :
p = plot_phylogeny(file = "../phylogenies/contig_8703_47_1768_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,4) # adjust x axis
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_8703_47_1768_+.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
protein contig_6917_680_2107 contig_9411_62_1708_- has lots of eukaryotic hits… Check that ! contig_6917_680_2107_nr.fa meme chose…
contig 9411, HGT ?
Define the corresponding contigs :
contig_set=paste0("contig_", c(5571, 3788, 11939, 20619, 10949))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Rhabdoviridae1_Tricho"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Rhabdoviridae1_Tricho")
## orf_name seqid source type start end score strand
## 28 contig_10949_44_1561_+ contig_10949 getorf_JV gene 44 1561 . TRUE
## 65 contig_11939_3_1463_- contig_11939 getorf_JV gene 3 1463 . FALSE
## 262 contig_20619_1_1005_- contig_20619 getorf_JV gene 1 1005 . FALSE
## 332 contig_3788_54_3266_+ contig_3788 getorf_JV gene 54 3266 . TRUE
## 350 contig_5571_1198_1908_+ contig_5571 getorf_JV gene 1198 1908 . TRUE
## 351 contig_5571_2006_2527_+ contig_5571 getorf_JV gene 2006 2527 . TRUE
## 352 contig_5571_283_1155_+ contig_5571 getorf_JV gene 283 1155 . TRUE
## 353 contig_5571_91_246_+ contig_5571 getorf_JV gene 91 246 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 28 1 1518 1563 YP_009337071.1 0.803 505
## 65 1 1461 1478 YP_009337071.1 0.577 487
## 262 1 1005 1051 YP_009337067.1 0.556 329
## 332 1 3213 3390 YP_009301743.1 0.525 1050
## 350 1 711 2527 YP_009337069.1 0.486 220
## 351 1 522 2527 YP_009337070.1 0.443 166
## 352 1 873 2527 YP_009337068.1 0.256 288
## 353 1 156 2527 YP_009337067.1 0.529 51
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 28 99 0 2 506 531 1035 6.814e-278 855
## 65 206 0 1 487 11 497 6.503e-195 614
## 262 146 0 1 329 1 329 3.026e-119 387
## 332 498 0 1 1050 1049 2098 0.000e+00 1120
## 350 109 0 17 236 11 223 2.022e-63 221
## 351 91 0 4 168 7 172 1.349e-38 146
## 352 202 0 1 288 1 272 2.934e-08 60
## 353 24 0 2 52 370 420 7.671e-12 62
## annotation
## 28 RNA-dependent RNA polymerase [Hubei dimarhabdovirus virus 2]
## 65 RNA-dependent RNA polymerase [Hubei dimarhabdovirus virus 2]
## 262 putative nucleoprotein [Hubei dimarhabdovirus virus 2]
## 332 RNA-dependent RNA polymerase [Wuhan Insect virus 7]
## 350 hypothetical protein 3 [Hubei dimarhabdovirus virus 2]
## 351 putative glycoprotein [Hubei dimarhabdovirus virus 2]
## 352 hypothetical protein 2 [Hubei dimarhabdovirus virus 2]
## 353 putative nucleoprotein [Hubei dimarhabdovirus virus 2]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 28 contig_10949_44_1561_+ contig_10949 getorf_JV gene 44 1561 . TRUE
## 65 contig_11939_3_1463_- contig_11939 getorf_JV gene 3 1463 . FALSE
## 262 contig_20619_1_1005_- contig_20619 getorf_JV gene 1 1005 . FALSE
## 332 contig_3788_54_3266_+ contig_3788 getorf_JV gene 54 3266 . TRUE
## 350 contig_5571_1198_1908_+ contig_5571 getorf_JV gene 1198 1908 . TRUE
## 351 contig_5571_2006_2527_+ contig_5571 getorf_JV gene 2006 2527 . TRUE
## 352 contig_5571_283_1155_+ contig_5571 getorf_JV gene 283 1155 . TRUE
## 353 contig_5571_91_246_+ contig_5571 getorf_JV gene 91 246 . TRUE
## phase attributes seq_length subject_id identity alignment_length
## 28 1 1518 1563 YP_009337071.1 0.803 505
## 65 1 1461 1478 YP_009337071.1 0.577 487
## 262 1 1005 1051 YP_009337067.1 0.556 329
## 332 1 3213 3390 YP_009301743.1 0.525 1050
## 350 1 711 2527 YP_009337069.1 0.486 220
## 351 1 522 2527 YP_009337070.1 0.443 166
## 352 1 873 2527 YP_009337068.1 0.256 288
## 353 1 156 2527 YP_009337067.1 0.529 51
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 28 99 0 2 506 531 1035 6.814e-278 855
## 65 206 0 1 487 11 497 6.503e-195 614
## 262 146 0 1 329 1 329 3.026e-119 387
## 332 498 0 1 1050 1049 2098 0.000e+00 1120
## 350 109 0 17 236 11 223 2.022e-63 221
## 351 91 0 4 168 7 172 1.349e-38 146
## 352 202 0 1 288 1 272 2.934e-08 60
## 353 24 0 2 52 370 420 7.671e-12 62
## annotation
## 28 RNA-dependent RNA polymerase [Hubei dimarhabdovirus virus 2]
## 65 RNA-dependent RNA polymerase [Hubei dimarhabdovirus virus 2]
## 262 putative nucleoprotein [Hubei dimarhabdovirus virus 2]
## 332 RNA-dependent RNA polymerase [Wuhan Insect virus 7]
## 350 hypothetical protein 3 [Hubei dimarhabdovirus virus 2]
## 351 putative glycoprotein [Hubei dimarhabdovirus virus 2]
## 352 hypothetical protein 2 [Hubei dimarhabdovirus virus 2]
## 353 putative nucleoprotein [Hubei dimarhabdovirus virus 2]
Hubei Dimarhabdovirus 2 is a non segmented virus 11332bp.
https://www.genome.jp/dbget-bin/www_bget?refseq:NC_033006
Our assembly is thus fragmented. However it seems to cover at least most of the RdRp:
YP_009337071.1 is the RdRp from Hubei dimarhabdovirus 2. Its has 2119 AA Contig 11939 encodes a protein that aligns with YP_009337071.1 from 11 to 497 (contains a RdRp domain) Contig 10949 encodes a protein that aligns with YP_009337071.1 from 531 to 1035 (contains a RdRp domain) contig_3788_54_3266_+ encodes a protein that aligns with YP_009337071.1 from 1048 to 2119 (contains a mRNA capping region and a viral-capping methyltransferase).
We built a phylogeny based on RdRP domain (orfs encoded by contigs 11939 and 10949 only):
p = plot_phylogeny(file = "../phylogenies/contig_3788_54_3266_+_FUSED-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,2) # adjust x axis
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_3788_54_3266_.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Chaq virus was first decribed in Webster et al. Plos Biology (2015) from small RNA sequencing. they write :“Neither Chaq Virus nor Galbut_virus_D.mel Virus has high sequence similarity to known viruses, but both also cluster with invertebrate transcriptome-derived sequences. These may represent new virus lineages, or be weakly conserved genes in a known virus group”.
For the phylogeny, we included the sequences obtained from D. simulans (contig_7817). We also found sequences related to Chaq virus in D. subobscura/D. obscura (contig_15880).
The alignments revealed two parts : a part common to all proteins in the N terminal part, and a C terminal part specific to some sequences. After alignment, only the common N terminal part was used for phylogenetic reconstruction.
p = plot_phylogeny("../phylogenies/contig_13219_258_1214_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,1)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_13219_258_1214.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Chaq virus was always associated with Galbut_virus_D.mel in D. mel and D. sim, as observed by Shi et al. 2018 (Proc B), whereas it was associated with Vera-Hubei diptera virus 17 in D. sub D.obs.
wuhan arthropod virus 2 is related to Black queen cell which suggest that this set of contigs may belong to the same genome (Shi et al. 2016 Nature). See fig S15.
Define the corresponding contigs :
contig_set=c("contig_7819", "contig_9727", "contig_22845")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Dicistroviridae_Pachy"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Dicistroviridae_Pachy")
## orf_name seqid source type start end score strand
## 304 contig_22845_207_4232_+ contig_22845 getorf_JV gene 207 4232 . TRUE
## 396 contig_7819_1403_1555_+ contig_7819 getorf_JV gene 1403 1555 . TRUE
## 397 contig_7819_2_1354_- contig_7819 getorf_JV gene 2 1354 . FALSE
## 468 contig_9727_2_1339_- contig_9727 getorf_JV gene 2 1339 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 304 1 4026 4457 NP_620565.1 0.398 834
## 396 1 153 1961 <NA> NA NA
## 397 1 1353 1961 YP_009342286.1 0.217 335
## 468 1 1338 1696 NP_620564.1 0.411 459
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 304 471 0 469 1302 20 802 3.251e-157 539
## 396 NA NA NA NA NA NA NA NA
## 397 248 0 113 447 135 452 6.716e-05 51
## 468 257 0 3 440 872 1330 9.803e-103 345
## annotation
## 304 structural polyprotein [Black queen cell virus]
## 396 <NA>
## 397 hypothetical protein 1 [Wuhan arthropod virus 2]
## 468 nonstructural polyprotein [Black queen cell virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 304 contig_22845_207_4232_+ contig_22845 getorf_JV gene 207 4232 . TRUE
## 396 contig_7819_1403_1555_+ contig_7819 getorf_JV gene 1403 1555 . TRUE
## 397 contig_7819_2_1354_- contig_7819 getorf_JV gene 2 1354 . FALSE
## 468 contig_9727_2_1339_- contig_9727 getorf_JV gene 2 1339 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 304 1 4026 4457 NP_620565.1 0.398 834
## 396 1 153 1961 <NA> NA NA
## 397 1 1353 1961 YP_009342286.1 0.217 335
## 468 1 1338 1696 NP_620564.1 0.411 459
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 304 471 0 469 1302 20 802 3.251e-157 539
## 396 NA NA NA NA NA NA NA NA
## 397 248 0 113 447 135 452 6.716e-05 51
## 468 257 0 3 440 872 1330 9.803e-103 345
## annotation
## 304 structural polyprotein [Black queen cell virus]
## 396 <NA>
## 397 hypothetical protein 1 [Wuhan arthropod virus 2]
## 468 nonstructural polyprotein [Black queen cell virus]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 304 contig_22845_207_4232_+ contig_22845 getorf_JV gene 207 4232 . TRUE
## 396 contig_7819_1403_1555_+ contig_7819 getorf_JV gene 1403 1555 . TRUE
## 397 contig_7819_2_1354_- contig_7819 getorf_JV gene 2 1354 . FALSE
## 468 contig_9727_2_1339_- contig_9727 getorf_JV gene 2 1339 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 304 1 4026 4457 NP_620565.1 0.398 834
## 396 1 153 1961 <NA> NA NA
## 397 1 1353 1961 YP_009342286.1 0.217 335
## 468 1 1338 1696 NP_620564.1 0.411 459
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 304 471 0 469 1302 20 802 3.251e-157 539
## 396 NA NA NA NA NA NA NA NA
## 397 248 0 113 447 135 452 6.716e-05 51
## 468 257 0 3 440 872 1330 9.803e-103 345
## annotation
## 304 structural polyprotein [Black queen cell virus]
## 396 <NA>
## 397 hypothetical protein 1 [Wuhan arthropod virus 2]
## 468 nonstructural polyprotein [Black queen cell virus]
domains in contig_22845
domains in contig_9727
no conserved domain were detected in c7819.
We built a phylogeny based on RdRp domain only of contig22845:
p = plot_phylogeny(file = "../phylogenies/contig_22845_207_4232_+_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,30)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_22845.pdf", plot = p, width = 12, height = 12)
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
We built a phylogeny based on RdRp domain only of contig9727:
p = plot_phylogeny(file = "../phylogenies/contig_9727_2_1339_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
#p = p + xlim(0,30)
ggsave(filename = "../phylogenies/contig_9727.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
THey are segmented viruses (4 segments for Hubei_tetragnatha_maxillosa_virus_8 and 6 segments for Wuhan_cricket_virus_2; see fig S31). Both viruses belong to the same clade see fig S11 (Shi et al. 2016). Unclear whether contig 15227 is part of this genome or of the Black Queen cell virus like.
Define the corresponding contigs :
contig_set=c("contig_14174", "contig_13124")
contig_set_unassigned="contig_15227"
# store for later fusion of corresponding lines
virus_list$"Partiti-like3_Pachy"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Hubei_tetragnatha_virus_Pachy")
## orf_name seqid source type start end score strand
## 95 contig_13124_62_1330_- contig_13124 getorf_JV gene 62 1330 . FALSE
## 127 contig_14174_3_1223_- contig_14174 getorf_JV gene 3 1223 . FALSE
## 142 contig_15227_3_1181_- contig_15227 getorf_JV gene 3 1181 . FALSE
## 143 contig_15227_3_1196_- contig_15227 getorf_JV gene 3 1196 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 95 1 1269 1389 YP_009345136.1 0.321 350
## 127 1 1221 1320 YP_009337885.1 0.522 404
## 142 1 1179 1262 <NA> NA NA
## 143 2 1194 1262 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 95 215 0 1 350 49 365 5.005e-47 181
## 127 193 0 4 407 9 412 5.523e-138 445
## 142 NA NA NA NA NA NA NA NA
## 143 NA NA NA NA NA NA NA NA
## annotation
## 95 hypothetical protein [Wuhan cricket virus 2]
## 127 RdRp [Hubei tetragnatha maxillosa virus 8]
## 142 <NA>
## 143 <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 95 contig_13124_62_1330_- contig_13124 getorf_JV gene 62 1330 . FALSE
## 127 contig_14174_3_1223_- contig_14174 getorf_JV gene 3 1223 . FALSE
## 142 contig_15227_3_1181_- contig_15227 getorf_JV gene 3 1181 . FALSE
## 143 contig_15227_3_1196_- contig_15227 getorf_JV gene 3 1196 . FALSE
## phase attributes seq_length subject_id identity alignment_length
## 95 1 1269 1389 YP_009345136.1 0.321 350
## 127 1 1221 1320 YP_009337885.1 0.522 404
## 142 1 1179 1262 <NA> NA NA
## 143 2 1194 1262 <NA> NA NA
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 95 215 0 1 350 49 365 5.005e-47 181
## 127 193 0 4 407 9 412 5.523e-138 445
## 142 NA NA NA NA NA NA NA NA
## 143 NA NA NA NA NA NA NA NA
## annotation
## 95 hypothetical protein [Wuhan cricket virus 2]
## 127 RdRp [Hubei tetragnatha maxillosa virus 8]
## 142 <NA>
## 143 <NA>
The unannotated contig show no hits based on blastx on nr. However, it has a nice ORF (with gertorf option 0). Include it
It is unclear whether these three contigs belong to the same genome. However, both Wuhan cricket virus 2 (the closest relative to contig_7819), and Hubei_tetragnatha_maxillosa_virus_8 (the closest relative to contig_14174) are segmented viruses :
https://www.genome.jp/virushostdb/1923697 (6 segments)
https://www.genome.jp/virushostdb/1923250 (4 segments)
Belong to Partiti-Picobirna. See fig 4 Shi et al.
see : Unprecedented genomic diversity of RNA viruses in arthropods reveals the ancestry of negative-sense RNA viruses. Elife. 2015 Jan 29;4.
We built a phylogeny based on RdRp gene :
p = plot_phylogeny(file = "../phylogenies/contig_14174_3_1223_-_with_homologs-PhyML_tree", taxo_info = wta_taxo_info)
p = p + xlim(0,3)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
ggsave(filename = "../phylogenies/contig_14174_3_1223_-.pdf", plot = p)
## Saving 7 x 5 in image
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
p
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
## Warning in FUN(X[[i]], ...): NAs introduced by coercion
Define the corresponding contigs :
contig_set=paste0("contig_", c("19093", "12071", "23064", "8887", "6006", "3164", "5214"))
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Eccles_virus_D.sub"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Eccles_virus_D.sub")
## orf_name seqid source type start end score strand
## 66 contig_12071_65_1192_- contig_12071 getorf_JV gene 65 1192 . FALSE
## 230 contig_19093_2_946_- contig_19093 getorf_JV gene 2 946 . FALSE
## 309 contig_23064_38_2905_+ contig_23064 getorf_JV gene 38 2905 . TRUE
## 327 contig_3164_1_3798_- contig_3164 getorf_JV gene 1 3798 . FALSE
## 345 contig_5214_65_2611_+ contig_5214 getorf_JV gene 65 2611 . TRUE
## 358 contig_6006_4_2379_+ contig_6006 getorf_JV gene 4 2379 . TRUE
## 428 contig_8887_1_1398_- contig_8887 getorf_JV gene 1 1398 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 66 1 1128 1468 AWA82242.1 1.000 376 0
## 230 1 945 1098 AWA82237.1 0.765 316 74
## 309 1 2868 2915 AWA82240.1 1.000 956 0
## 327 1 3798 3909 AWA82238.1 0.998 803 2
## 345 1 2547 2671 AWA82239.1 0.967 849 28
## 358 1 2376 2380 AWA82237.1 0.998 792 2
## 428 1 1398 1517 AWA82241.1 0.993 466 3
## gap_opens qstart qend sstart send evalue bitscore
## 66 0 1 376 100 475 7.604e-247 758
## 230 0 1 315 130 445 4.258e-147 467
## 309 0 1 956 126 1081 0.000e+00 1917
## 327 0 320 1122 1 803 0.000e+00 1625
## 345 0 1 849 192 1040 0.000e+00 1639
## 358 0 1 792 543 1334 0.000e+00 1572
## 428 0 1 466 117 582 4.160e-311 949
## annotation
## 66 hypothetical protein [Eccles virus]
## 230 putative RNA dependent RNA polymerase, partial [Eccles virus]
## 309 hypothetical protein [Eccles virus]
## 327 hypothetical protein, partial [Eccles virus]
## 345 hypothetical protein [Eccles virus]
## 358 putative RNA dependent RNA polymerase, partial [Eccles virus]
## 428 hypothetical protein [Eccles virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 66 contig_12071_65_1192_- contig_12071 getorf_JV gene 65 1192 . FALSE
## 230 contig_19093_2_946_- contig_19093 getorf_JV gene 2 946 . FALSE
## 309 contig_23064_38_2905_+ contig_23064 getorf_JV gene 38 2905 . TRUE
## 327 contig_3164_1_3798_- contig_3164 getorf_JV gene 1 3798 . FALSE
## 345 contig_5214_65_2611_+ contig_5214 getorf_JV gene 65 2611 . TRUE
## 358 contig_6006_4_2379_+ contig_6006 getorf_JV gene 4 2379 . TRUE
## 428 contig_8887_1_1398_- contig_8887 getorf_JV gene 1 1398 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 66 1 1128 1468 AWA82242.1 1.000 376 0
## 230 1 945 1098 AWA82237.1 0.765 316 74
## 309 1 2868 2915 AWA82240.1 1.000 956 0
## 327 1 3798 3909 AWA82238.1 0.998 803 2
## 345 1 2547 2671 AWA82239.1 0.967 849 28
## 358 1 2376 2380 AWA82237.1 0.998 792 2
## 428 1 1398 1517 AWA82241.1 0.993 466 3
## gap_opens qstart qend sstart send evalue bitscore
## 66 0 1 376 100 475 7.604e-247 758
## 230 0 1 315 130 445 4.258e-147 467
## 309 0 1 956 126 1081 0.000e+00 1917
## 327 0 320 1122 1 803 0.000e+00 1625
## 345 0 1 849 192 1040 0.000e+00 1639
## 358 0 1 792 543 1334 0.000e+00 1572
## 428 0 1 466 117 582 4.160e-311 949
## annotation
## 66 hypothetical protein [Eccles virus]
## 230 putative RNA dependent RNA polymerase, partial [Eccles virus]
## 309 hypothetical protein [Eccles virus]
## 327 hypothetical protein, partial [Eccles virus]
## 345 hypothetical protein [Eccles virus]
## 358 putative RNA dependent RNA polymerase, partial [Eccles virus]
## 428 hypothetical protein [Eccles virus]
Eccles virus was reported by Medd et al. 2018 in D. suzukii where 6 segments were detected.
Define the corresponding contigs :
contig_set=c("contig_4993", "contig_5731","contig_22835", "contig_20871")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"LaJolla_virus"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "LaJolla_virus")
## orf_name seqid source type start end score strand
## 265 contig_20871_140_337_- contig_20871 getorf_JV gene 140 337 . FALSE
## 266 contig_20871_324_1043_+ contig_20871 getorf_JV gene 324 1043 . TRUE
## 303 contig_22835_1_4500_- contig_22835 getorf_JV gene 1 4500 . FALSE
## 343 contig_4993_881_2755_+ contig_4993 getorf_JV gene 881 2755 . TRUE
## 354 contig_5731_64_1650_+ contig_5731 getorf_JV gene 64 1650 . TRUE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 265 1 198 1043 <NA> NA NA NA
## 266 1 720 1043 AWY11061.1 1.000 240 0
## 303 1 4500 4520 AKH40286.1 0.980 1500 30
## 343 1 1875 2757 AKH40286.1 0.985 624 9
## 354 1 1587 1652 AKH40286.1 0.994 529 3
## gap_opens qstart qend sstart send evalue bitscore
## 265 NA NA NA NA NA NA NA
## 266 0 1 240 2067 2306 2.273e-170 529
## 303 0 1 1500 635 2134 0.000e+00 3028
## 343 0 1 624 1 624 0.000e+00 1263
## 354 0 1 529 2330 2858 0.000e+00 1106
## annotation
## 265 <NA>
## 266 putative polyprotein [La Jolla virus]
## 303 putative polyprotein, partial [La Jolla virus]
## 343 putative polyprotein, partial [La Jolla virus]
## 354 putative polyprotein, partial [La Jolla virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 265 contig_20871_140_337_- contig_20871 getorf_JV gene 140 337 . FALSE
## 266 contig_20871_324_1043_+ contig_20871 getorf_JV gene 324 1043 . TRUE
## 303 contig_22835_1_4500_- contig_22835 getorf_JV gene 1 4500 . FALSE
## 343 contig_4993_881_2755_+ contig_4993 getorf_JV gene 881 2755 . TRUE
## 354 contig_5731_64_1650_+ contig_5731 getorf_JV gene 64 1650 . TRUE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 265 1 198 1043 <NA> NA NA NA
## 266 1 720 1043 AWY11061.1 1.000 240 0
## 303 1 4500 4520 AKH40286.1 0.980 1500 30
## 343 1 1875 2757 AKH40286.1 0.985 624 9
## 354 1 1587 1652 AKH40286.1 0.994 529 3
## gap_opens qstart qend sstart send evalue bitscore
## 265 NA NA NA NA NA NA NA
## 266 0 1 240 2067 2306 2.273e-170 529
## 303 0 1 1500 635 2134 0.000e+00 3028
## 343 0 1 624 1 624 0.000e+00 1263
## 354 0 1 529 2330 2858 0.000e+00 1106
## annotation
## 265 <NA>
## 266 putative polyprotein [La Jolla virus]
## 303 putative polyprotein, partial [La Jolla virus]
## 343 putative polyprotein, partial [La Jolla virus]
## 354 putative polyprotein, partial [La Jolla virus]
La Jolla virus was reported in Webster et al. 2016 and Medd et al. It is very frequent in D. melanogaster, also found in D. simulans and D. suzukii.
Define the corresponding contigs :
contig_set=c("contig_17257", "contig_19996")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"CraigiesHill_virus_Dmel"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "CraigiesHill_virus_Dmel")
## orf_name seqid source type start end score strand
## 185 contig_17257_75_1004_+ contig_17257 getorf_JV gene 75 1004 . TRUE
## 243 contig_19996_246_1067_+ contig_19996 getorf_JV gene 246 1067 . TRUE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 185 1 930 1167 AWY11116.1 1 310 0
## 243 1 822 1069 AKH67449.1 1 274 0
## gap_opens qstart qend sstart send evalue bitscore
## 185 0 1 310 1 310 1.117e-207 641
## 243 0 1 274 463 736 4.341e-183 568
## annotation
## 185 orf1 [Craigies Hill virus]
## 243 hypothetical protein, partial [Craigies Hill virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 185 contig_17257_75_1004_+ contig_17257 getorf_JV gene 75 1004 . TRUE
## 243 contig_19996_246_1067_+ contig_19996 getorf_JV gene 246 1067 . TRUE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 185 1 930 1167 AWY11116.1 1 310 0
## 243 1 822 1069 AKH67449.1 1 274 0
## gap_opens qstart qend sstart send evalue bitscore
## 185 0 1 310 1 310 1.117e-207 641
## 243 0 1 274 463 736 4.341e-183 568
## annotation
## 185 orf1 [Craigies Hill virus]
## 243 hypothetical protein, partial [Craigies Hill virus]
CraigiesHill_virus was reported in Webster et al. 2016 in D. melanogaster.
Define the corresponding contigs :
contig_set=c("contig_1269")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Muthill_virus_D.im"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Muthill_virus_D.im")
## orf_name seqid source type start end score strand
## 79 contig_1269_106_456_- contig_1269 getorf_JV gene 106 456 . FALSE
## 80 contig_1269_1060_1761_- contig_1269 getorf_JV gene 1060 1761 . FALSE
## 81 contig_1269_1800_3650_- contig_1269 getorf_JV gene 1800 3650 . FALSE
## 82 contig_1269_3699_7634_- contig_1269 getorf_JV gene 3699 7634 . FALSE
## 83 contig_1269_486_1019_- contig_1269 getorf_JV gene 486 1019 . FALSE
## 84 contig_1269_7688_10429_- contig_1269 getorf_JV gene 7688 10429 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 79 1 351 10490 AVZ66287.1 0.614 114 44
## 80 1 702 10490 AVZ66285.1 0.723 228 63
## 81 1 1851 10490 AMO03224.1 1.000 617 0
## 82 1 3936 10490 AMO03223.1 0.996 1312 5
## 83 1 534 10490 AVZ66286.1 0.764 178 42
## 84 1 2742 10490 AMO03223.1 0.988 796 10
## gap_opens qstart qend sstart send evalue bitscore
## 79 0 1 114 21 134 4.967e-38 141
## 80 0 5 232 3 230 2.271e-107 347
## 81 0 1 617 1 617 0.000e+00 1250
## 82 0 1 1312 933 2244 0.000e+00 2709
## 83 0 1 178 1 178 1.457e-83 276
## 84 0 1 796 1 796 0.000e+00 1620
## annotation
## 79 hypothetical protein [Brandeis virus]
## 80 hypothetical protein [Brandeis virus]
## 81 hypothetical protein [Muthill virus]
## 82 polyprotein [Muthill virus]
## 83 hypothetical protein [Brandeis virus]
## 84 polyprotein [Muthill virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 79 contig_1269_106_456_- contig_1269 getorf_JV gene 106 456 . FALSE
## 80 contig_1269_1060_1761_- contig_1269 getorf_JV gene 1060 1761 . FALSE
## 81 contig_1269_1800_3650_- contig_1269 getorf_JV gene 1800 3650 . FALSE
## 82 contig_1269_3699_7634_- contig_1269 getorf_JV gene 3699 7634 . FALSE
## 83 contig_1269_486_1019_- contig_1269 getorf_JV gene 486 1019 . FALSE
## 84 contig_1269_7688_10429_- contig_1269 getorf_JV gene 7688 10429 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 79 1 351 10490 AVZ66287.1 0.614 114 44
## 80 1 702 10490 AVZ66285.1 0.723 228 63
## 81 1 1851 10490 AMO03224.1 1.000 617 0
## 82 1 3936 10490 AMO03223.1 0.996 1312 5
## 83 1 534 10490 AVZ66286.1 0.764 178 42
## 84 1 2742 10490 AMO03223.1 0.988 796 10
## gap_opens qstart qend sstart send evalue bitscore
## 79 0 1 114 21 134 4.967e-38 141
## 80 0 5 232 3 230 2.271e-107 347
## 81 0 1 617 1 617 0.000e+00 1250
## 82 0 1 1312 933 2244 0.000e+00 2709
## 83 0 1 178 1 178 1.457e-83 276
## 84 0 1 796 1 796 0.000e+00 1620
## annotation
## 79 hypothetical protein [Brandeis virus]
## 80 hypothetical protein [Brandeis virus]
## 81 hypothetical protein [Muthill virus]
## 82 polyprotein [Muthill virus]
## 83 hypothetical protein [Brandeis virus]
## 84 polyprotein [Muthill virus]
Muthill virus was reported in Webster et al. 2016 and found in D. immigrans.
Define the corresponding contigs :
contig_set=c("contig_7817")
for phylogeny, see paragraph 2.3 (Chaq virus in D. mel).
Define the corresponding contigs :
contig_set=c("contig_1582", "contig_22830")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Noravirus_D.im"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Noravirus_D.im")
## orf_name seqid source type start end score
## 148 contig_1582_3_4766_- contig_1582 getorf_JV gene 3 4766 .
## 149 contig_1582_4772_6226_- contig_1582 getorf_JV gene 4772 6226 .
## 300 contig_22830_3250_4134_- contig_22830 getorf_JV gene 3250 4134 .
## 301 contig_22830_410_3184_- contig_22830 getorf_JV gene 410 3184 .
## 302 contig_22830_4112_4390_- contig_22830 getorf_JV gene 4112 4390 .
## strand phase attributes seq_length subject_id identity alignment_length
## 148 FALSE 1 4764 6778 ABC55268.2 0.575 1648
## 149 FALSE 1 1455 6778 AHZ92153.1 0.995 485
## 300 FALSE 1 885 4474 AHZ92154.1 0.993 295
## 301 FALSE 1 2775 4474 ABC55270.1 0.810 925
## 302 FALSE 1 279 4474 AHZ92156.1 1.000 93
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 148 674 0 1 1588 1 1648 0.000e+00 1875
## 149 2 0 1 485 1 485 2.590e-310 947
## 300 2 0 1 295 1 295 5.381e-175 546
## 301 174 0 1 925 1 918 0.000e+00 1532
## 302 0 0 1 93 1951 2043 1.120e-59 202
## annotation
## 148 replication polyprotein [Nora virus]
## 149 VP1 [Drosophila immigrans Nora virus]
## 300 VP3 [Drosophila immigrans Nora virus]
## 301 ORF 4 [Nora virus]
## 302 VP2 [Drosophila immigrans Nora virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score
## 148 contig_1582_3_4766_- contig_1582 getorf_JV gene 3 4766 .
## 149 contig_1582_4772_6226_- contig_1582 getorf_JV gene 4772 6226 .
## 300 contig_22830_3250_4134_- contig_22830 getorf_JV gene 3250 4134 .
## 301 contig_22830_410_3184_- contig_22830 getorf_JV gene 410 3184 .
## 302 contig_22830_4112_4390_- contig_22830 getorf_JV gene 4112 4390 .
## strand phase attributes seq_length subject_id identity alignment_length
## 148 FALSE 1 4764 6778 ABC55268.2 0.575 1648
## 149 FALSE 1 1455 6778 AHZ92153.1 0.995 485
## 300 FALSE 1 885 4474 AHZ92154.1 0.993 295
## 301 FALSE 1 2775 4474 ABC55270.1 0.810 925
## 302 FALSE 1 279 4474 AHZ92156.1 1.000 93
## mismatches gap_opens qstart qend sstart send evalue bitscore
## 148 674 0 1 1588 1 1648 0.000e+00 1875
## 149 2 0 1 485 1 485 2.590e-310 947
## 300 2 0 1 295 1 295 5.381e-175 546
## 301 174 0 1 925 1 918 0.000e+00 1532
## 302 0 0 1 93 1951 2043 1.120e-59 202
## annotation
## 148 replication polyprotein [Nora virus]
## 149 VP1 [Drosophila immigrans Nora virus]
## 300 VP3 [Drosophila immigrans Nora virus]
## 301 ORF 4 [Nora virus]
## 302 VP2 [Drosophila immigrans Nora virus]
Define the corresponding contigs :
contig_set=c("contig_13750", "contig_6964", "contig_17142")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Bloomfield_virus_D.mel"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Bloomfield_virus_D.mel")
## orf_name seqid source type start end score strand
## 119 contig_13750_119_1306_- contig_13750 getorf_JV gene 119 1306 . FALSE
## 184 contig_17142_383_1171_+ contig_17142 getorf_JV gene 383 1171 . TRUE
## 376 contig_6964_1_2091_- contig_6964 getorf_JV gene 1 2091 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 119 1 1188 1344 AKH40315.1 0.997 396 1
## 184 1 789 1171 AKH40312.1 1.000 263 0
## 376 1 2091 2132 AKH40311.1 0.988 696 8
## gap_opens qstart qend sstart send evalue bitscore
## 119 0 1 396 151 546 1.133e-247 762
## 184 0 1 263 688 950 2.625e-170 531
## 376 0 1 696 509 1204 0.000e+00 1385
## annotation
## 119 ORF1 [Bloomfield virus]
## 184 ORF1 [Bloomfield virus]
## 376 putative major core protein [Bloomfield virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 119 contig_13750_119_1306_- contig_13750 getorf_JV gene 119 1306 . FALSE
## 184 contig_17142_383_1171_+ contig_17142 getorf_JV gene 383 1171 . TRUE
## 376 contig_6964_1_2091_- contig_6964 getorf_JV gene 1 2091 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 119 1 1188 1344 AKH40315.1 0.997 396 1
## 184 1 789 1171 AKH40312.1 1.000 263 0
## 376 1 2091 2132 AKH40311.1 0.988 696 8
## gap_opens qstart qend sstart send evalue bitscore
## 119 0 1 396 151 546 1.133e-247 762
## 184 0 1 263 688 950 2.625e-170 531
## 376 0 1 696 509 1204 0.000e+00 1385
## annotation
## 119 ORF1 [Bloomfield virus]
## 184 ORF1 [Bloomfield virus]
## 376 putative major core protein [Bloomfield virus]
Define the corresponding contigs :
contig_set=c("contig_5504")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Prestney_Burn_D.sub|obs"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Prestney_Burn_D.sub|obs")
## orf_name seqid source type start end score strand
## 348 contig_5504_1_468_- contig_5504 getorf_JV gene 1 468 . FALSE
## 349 contig_5504_528_1862_- contig_5504 getorf_JV gene 528 1862 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 348 1 468 1912 AWY11067.1 0.935 156 10
## 349 1 1335 1912 AMO03210.1 0.993 445 3
## gap_opens qstart qend sstart send evalue bitscore
## 348 0 1 156 1 156 4.938e-97 313
## 349 0 1 445 134 578 2.114e-285 873
## annotation
## 348 putative RNA-dependent RNA polymerase [Motts Mill virus]
## 349 hypothetical protein 1 [Prestney Burn virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 348 contig_5504_1_468_- contig_5504 getorf_JV gene 1 468 . FALSE
## 349 contig_5504_528_1862_- contig_5504 getorf_JV gene 528 1862 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 348 1 468 1912 AWY11067.1 0.935 156 10
## 349 1 1335 1912 AMO03210.1 0.993 445 3
## gap_opens qstart qend sstart send evalue bitscore
## 348 0 1 156 1 156 4.938e-97 313
## 349 0 1 445 134 578 2.114e-285 873
## annotation
## 348 putative RNA-dependent RNA polymerase [Motts Mill virus]
## 349 hypothetical protein 1 [Prestney Burn virus]
Define the corresponding contigs :
contig_set=c("contig_12896")
contig_set_unassigned=NA
# store for later fusion of corresponding lines
virus_list$"Motts_Mill_D.sub|obs"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Motts_Mill_D.sub|obs")
## orf_name seqid source type start end score strand
## 86 contig_12896_10_615_+ contig_12896 getorf_JV gene 10 615 . TRUE
## 87 contig_12896_739_1119_+ contig_12896 getorf_JV gene 739 1119 . TRUE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 86 1 606 1121 AWY11140.1 0.851 202 30
## 87 1 381 1121 AKH40293.1 0.488 125 63
## gap_opens qstart qend sstart send evalue bitscore
## 86 0 1 202 1 202 9.484e-111 355
## 87 0 2 126 14 138 1.168e-31 123
## annotation
## 86 orf1 [Motts Mill virus]
## 87 orf2, partial [Motts Mill virus]
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 86 contig_12896_10_615_+ contig_12896 getorf_JV gene 10 615 . TRUE
## 87 contig_12896_739_1119_+ contig_12896 getorf_JV gene 739 1119 . TRUE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 86 1 606 1121 AWY11140.1 0.851 202 30
## 87 1 381 1121 AKH40293.1 0.488 125 63
## gap_opens qstart qend sstart send evalue bitscore
## 86 0 1 202 1 202 9.484e-111 355
## 87 0 2 126 14 138 1.168e-31 123
## annotation
## 86 orf1 [Motts Mill virus]
## 87 orf2, partial [Motts Mill virus]
Define the corresponding contigs :
contig_set=NA
contig_set_unassigned=paste0("contig_", c(20830))
# store for later fusion of corresponding lines
virus_list$"Dark1"=list(contig_set=contig_set, contig_set_unassigned=contig_set_unassigned)
res=plot_orfs(contig_set = contig_set , contig_set_unassigned = contig_set_unassigned, gff = gff_wta2 , name = "Dark1")
## orf_name seqid source type start end score strand
## 263 contig_20830_68_1042_- contig_20830 getorf_JV gene 68 1042 . FALSE
## 264 contig_20830_68_778_- contig_20830 getorf_JV gene 68 778 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 263 2 975 1044 <NA> NA NA NA
## 264 1 711 1044 <NA> NA NA NA
## gap_opens qstart qend sstart send evalue bitscore annotation
## 263 NA NA NA NA NA NA NA <NA>
## 264 NA NA NA NA NA NA NA <NA>
## Saving 7 x 5 in image
res[1]
## [[1]]
res[2]
## [[1]]
## orf_name seqid source type start end score strand
## 263 contig_20830_68_1042_- contig_20830 getorf_JV gene 68 1042 . FALSE
## 264 contig_20830_68_778_- contig_20830 getorf_JV gene 68 778 . FALSE
## phase attributes seq_length subject_id identity alignment_length mismatches
## 263 2 975 1044 <NA> NA NA NA
## 264 1 711 1044 <NA> NA NA NA
## gap_opens qstart qend sstart send evalue bitscore annotation
## 263 NA NA NA NA NA NA NA <NA>
## 264 NA NA NA NA NA NA NA <NA>
contig_20830 => MAG: hypothetical protein [Diaphorina citri cimodo-like virus] Sequence ID: QXG83187.1Length: 636 evalue 0.006
head(virus_list)
## $Parvoviridae_Pachy
## $Parvoviridae_Pachy$contig_set
## [1] "contig_2320"
##
## $Parvoviridae_Pachy$contig_set_unassigned
## [1] NA
##
##
## $Vesantovirus_D.sub
## $Vesantovirus_D.sub$contig_set
## [1] "contig_2799" "contig_14992" "contig_2780" "contig_2857" "contig_22871"
## [6] "contig_2659" "contig_8503" "contig_15585"
##
## $Vesantovirus_D.sub$contig_set_unassigned
## [1] "contig_7654" "contig_17519"
##
##
## $Parvoviridae2
## $Parvoviridae2$contig_set
## [1] "contig_15192"
##
## $Parvoviridae2$contig_set_unassigned
## [1] NA
##
##
## $Linvill_road_virus_D.sim
## $Linvill_road_virus_D.sim$contig_set
## [1] "contig_627" "contig_626"
##
## $Linvill_road_virus_D.sim$contig_set_unassigned
## [1] NA
##
##
## $LbFV_L.b
## $LbFV_L.b$contig_set
## [1] "contig_1505" "contig_22345" "contig_1350" "contig_22895" "contig_22365"
## [6] "contig_22449" "contig_22533" "contig_19307" "contig_12283" "contig_22381"
##
## $LbFV_L.b$contig_set_unassigned
## [1] NA
##
##
## $LhFV_L.h
## $LhFV_L.h$contig_set
## [1] "contig_9355" "contig_21206" "contig_19696" "contig_3127" "contig_356"
## [6] "contig_682" "contig_22485" "contig_223" "contig_701" "contig_22588"
## [11] "contig_2709" "contig_19153"
##
## $LhFV_L.h$contig_set_unassigned
## [1] NA
list_res=list()
for (i in 1:length(virus_list)){
data=virus_list[[i]]
#data = data[!is.na(data)]
n1=length(data$contig_set[!is.na(data$contig_set)])
n2=length(data$contig_set_unassigned[!is.na(data$contig_set_unassigned)])
n=n1+n2
virus_name=rep(paste0(names(virus_list)[i], " n=", n), n)
contig_name=c(data$contig_set[!is.na(data$contig_set)], data$contig_set_unassigned[!is.na(data$contig_set_unassigned)])
homologs=c(rep(TRUE, n1), rep(FALSE, n2))
d=data.frame(virus_name, contig_name, homologs)
list_res[[i]]=d
}
summary_table=do.call(what = rbind.data.frame, list_res)
write.table(summary_table, file = "../TABLES/summary_table.txt", col.names = TRUE, row.names = FALSE, quote=FALSE, sep="\t")
head(summary_table)
check that all contigs have been analyzed.
wga_table2=read.table("../TABLES/wga_cov2_viruses_plus_unassigned_clean2.tab.txt", header = TRUE, sep="\t")
wta_table2=read.table(file = "../TABLES/wta_cov2_viruses_plus_unassigned2.tab.txt", header = TRUE, sep="\t")
all_contigs=c(rownames(wga_table2), rownames(wta_table2))
not_analyzed=setdiff(all_contigs, summary_table$contig_name)
wga_table2[wga_table2$contig_name %in% not_analyzed,]
wta_table2[wta_table2$contig_name %in% not_analyzed,]